In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import dotenv_values
from datetime import datetime

In [2]:
def get_latest_file(folder_path):
    files = [os.path.join(folder_path, file) for file in os.listdir(folder_path)]
    
    if files:
        latest_file = max(files, key=os.path.getmtime)
        return latest_file
    else:
        return None

def read_latest_file_into_dataframe(folder_path):
    latest_file = get_latest_file(folder_path)

    if latest_file:
        if latest_file.lower().endswith('.csv'):
            df = pd.read_csv(latest_file)
        elif latest_file.lower().endswith(('.xls', '.xlsx')):
            df = pd.read_excel(latest_file)
        else:
            print(f"Unsupported file format for {latest_file}")
            return None

        return df
    else:
        print(f"No files found in {folder_path}")
        return None

In [3]:
# Define the folder path
folder_path = r"C:\Users\Temidayo\Desktop\Test_Question\Schedule_Folder"
latest_df = read_latest_file_into_dataframe(folder_path)

if latest_df is not None:
    print("DataFrame created from the latest file: File Available")
    

latest_df.head(10)

DataFrame created from the latest file: File Available


Unnamed: 0,loan_id,schedule_id,Expected_payment_date,Expected_payment_amount
0,32u09wekjbfje,3434r409km123456,2021-02-27,100790.333333
1,32u09wekjbfje,3434r409km123457,2021-03-27,100790.333333
2,32u09wekjbfje,3434r409km123458,2021-04-27,100790.333333
3,32u09wekjbfje,3434r409km123459,2021-05-27,100790.333333
4,32u09wekjbfje,3434r409km123460,2021-06-27,100790.333333
5,32u09wekjbfje,3434r409km123461,2021-07-27,100790.333333
6,32u09wekjbfje,3434r409km123462,2021-08-27,100790.333333
7,32u09wekjbfje,3434r409km123463,2021-09-27,100790.333333
8,32u09wekjbfje,3434r409km123464,2021-10-27,100790.333333
9,32u09wekjbfje,3434r409km123465,2021-11-27,100790.333333


In [4]:
latest_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   loan_id                  84 non-null     object        
 1   schedule_id              84 non-null     object        
 2   Expected_payment_date    84 non-null     datetime64[ns]
 3   Expected_payment_amount  84 non-null     float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 2.8+ KB


In [5]:
# Adding the ingestion_date column with the current UTC timestamp
latest_df['ingestion_date'] = datetime.utcnow()

# Display the updated DataFrame
latest_df.head()

Unnamed: 0,loan_id,schedule_id,Expected_payment_date,Expected_payment_amount,ingestion_date
0,32u09wekjbfje,3434r409km123456,2021-02-27,100790.333333,2024-08-06 19:37:42.525574
1,32u09wekjbfje,3434r409km123457,2021-03-27,100790.333333,2024-08-06 19:37:42.525574
2,32u09wekjbfje,3434r409km123458,2021-04-27,100790.333333,2024-08-06 19:37:42.525574
3,32u09wekjbfje,3434r409km123459,2021-05-27,100790.333333,2024-08-06 19:37:42.525574
4,32u09wekjbfje,3434r409km123460,2021-06-27,100790.333333,2024-08-06 19:37:42.525574


In [6]:
# Step 3: Load environment variables from .env file
env_dir = r'C:\Users\Temidayo\Desktop\Test_Question\Credentials'
env_values = dotenv_values(os.path.join(env_dir, '.env'))

# Azure SQL Database details
sql_server = env_values.get("sql_server")
sql_database = env_values.get("sql_database")
sql_username = env_values.get("sql_username")
sql_password = env_values.get("sql_password")
sql_driver = 'ODBC Driver 17 for SQL Server'
schema_name = 'autocheck'
table_name = 'schedule'

In [7]:
# Function to load DataFrame to SQL database
def load_data_to_sql(latest_df, table_name):
    try:
        connection_string = f"mssql+pyodbc://{sql_username}:{sql_password}@{sql_server}/{sql_database}?driver={sql_driver}"
        engine = create_engine(connection_string)
        
        # Append DataFrame to SQL table
        latest_df.to_sql(table_name, engine, schema=schema_name, if_exists='replace', index=False)
        
        print(f"Data appended to Azure SQL Database table {schema_name}.{table_name} successfully.")
    except Exception as e:
        print(f"Error writing to SQL Database: {str(e)}")

# Check if DataFrame is available before attempting to load it to SQL
if latest_df is not None:
    load_data_to_sql(latest_df, table_name)

Data appended to Azure SQL Database table autocheck.schedule successfully.
