## **To Merge the Provided Dataset**

In [None]:
import pandas as pd
import glob

file_paths = glob.glob('Exchange_Rate_Report_*.csv')

processed_dfs = []

for file_path in file_paths:
   
    exchange_rate_df = pd.read_csv(file_path)

    # Step 1: Convert the "Date" column to datetime format, and drop rows where the conversion failed
    exchange_rate_df['Date'] = pd.to_datetime(exchange_rate_df['Date'], format='%d-%b-%y', errors='coerce')
    exchange_rate_df = exchange_rate_df.dropna(subset=['Date'])  # Drop rows where 'Date' is NaT

    # Step 2: Create a full date range from January 1st to the maximum date in the dataset
    full_date_range = pd.date_range(start=f'{exchange_rate_df["Date"].min().year}-01-01', end=exchange_rate_df['Date'].max())

    # Step 3: Reindex the DataFrame to include all dates in the range
    exchange_rate_df.set_index('Date', inplace=True)
    reindexed_df = exchange_rate_df.reindex(full_date_range)

    # Step 4: Backward fill for missing values at the beginning, then forward fill for the rest
    filled_df = reindexed_df.bfill().ffill()

    # Step 5: Fill any remaining missing values in the dataset using the mean method
    filled_df = filled_df.fillna(filled_df.mean())

    # Append the cleaned and filled DataFrame to the list
    processed_dfs.append(filled_df)


final_df = pd.concat(processed_dfs)
final_df.to_csv('Merged_Exchange_Rate_2013_2022.csv')

print("All CSV files processed, missing values handled, and merged into 'Merged_Exchange_Rate_2013_2022.csv'.")

## **To Merge the IMF Dataset**

In [None]:
import os
import glob
import pandas as pd

# Specify the folder path where your files are located
folder_path = 'path_to_your_csv_folder'  # Update this path with the actual folder path

# Change directory to the folder containing the CSV files
os.chdir(folder_path)

# Use glob to find all CSV files that end with '_transposed.csv'
file_paths = glob.glob('*_transposed.csv')
print("CSV files found:", file_paths)  # This will print the list of found files

# Check if any files were found
if not file_paths:
    print("No files matching the pattern were found.")
else:
    processed_dfs = []
    for file_path in file_paths:
        try:
            # Read CSV
            df = pd.read_csv(file_path)
            print(f"Processing file: {file_path}")
            print(df.head())  # Display the first few rows
            
            # Process the DataFrame (example: forward fill missing values)
            filled_df = df.fillna(method='ffill')  # Adjust your processing logic as needed
            processed_dfs.append(filled_df)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    # Concatenate all DataFrames if they exist
    if processed_dfs:
        final_df = pd.concat(processed_dfs)
        final_df.to_csv('Merged_Exchange_Rate_2013_2022.csv', index=False)  # Save the merged CSV
        print("All CSV files processed and merged into 'Merged_Exchange_Rate_2013_2022.csv'.")
    else:
        print("No valid DataFrames to concatenate.")


## **Final Formating of Both Files Before Merging**

In [None]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('path_to_your_csv_file/Converted_Exchange_Rate.csv')  # Update with the correct path

# Convert 'Unnamed: 0' column to datetime, and format it as 'YYYY-MM-DD'
#df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'], format='%B %d, %Y').dt.strftime('%Y-%m-%d')

# Rename the column to 'Date'
df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)

# Extract the 'Date' column
date_column = df['Date']

# Sort all columns except 'Date' alphabetically
sorted_columns = sorted(df.columns.difference(['Date']))

# Apply column-wise interpolation (numeric columns only)
df[sorted_columns] = df[sorted_columns].interpolate(method='polynomial', order=3)

# Apply forward-fill (ffill) and backward-fill (bfill) to handle missing values at boundaries
df[sorted_columns] = df[sorted_columns].ffill().bfill()

# Round all numeric columns to 4 decimal places
df[sorted_columns] = df[sorted_columns].round(4)

# Recreate the DataFrame with 'Date' as the first column, followed by the sorted columns
df = df[['Date'] + sorted_columns]

# Save the updated DataFrame to a new CSV file
df.to_csv('path_to_your_csv_file/Converted_Exchange_Rate.csv', index=False)  # Update with the correct path

print("Date conversion completed successfully!")

## **Merge**

In [None]:
import pandas as pd

# Load the first CSV file
df1 = pd.read_csv('/Users/shravyadsouza/PycharmProjects/NT/csv/Merged_Exchange_Rate_2023_2024.csv')

# Load the second CSV file
df2 = pd.read_csv('/Users/shravyadsouza/PycharmProjects/NT/Converted_Exchange_Rate.csv')

# Append (concatenate) the two DataFrames row-wise
df_merged = pd.concat([df1, df2], ignore_index=True)

# Save the merged DataFrame to a new CSV file
df_merged.to_csv('/Users/shravyadsouza/PycharmProjects/NT/Exchange_Rate_2013_24.csv', index=False)

print("CSV files have been successfully merged and saved.")

## **Sort According To Date**

In [None]:
import pandas as pd

# Load the first CSV file
df1 = pd.read_csv('path_to_your_csv_folder/Merged_Exchange_Rate_2023_2024.csv')  # Update with the correct path

# Load the second CSV file
df2 = pd.read_csv('path_to_your_csv_folder/Converted_Exchange_Rate.csv')  # Update with the correct path

# Append (concatenate) the two DataFrames row-wise
df_merged = pd.concat([df1, df2], ignore_index=True)

# Save the merged DataFrame to a new CSV file
df_merged.to_csv('path_to_your_csv_folder/Exchange_Rate_2013_24.csv', index=False)  # Update with the correct path

print("CSV files have been successfully merged and saved.")

## **Filling Empty Values in the Final Dataset**

In [None]:
import pandas as pd

# Load the CSV file
file_path = "path_to_your_csv_file/Exchange_Rate_Sorted_Descending.csv"  # Update with the correct path
df = pd.read_csv(file_path)

# Fill missing values with the median for each column
df_filled = df.fillna(df.median(numeric_only=True)).round(4)

# Save the updated CSV file with missing values filled with median
output_file = "path_to_your_csv_file/Exchange_Rate13-24.csv"  # Update with the correct path
df_filled.to_csv(output_file, index=False)

print(f"Missing values have been filled with median values and saved to {output_file}")