In [2]:
import pandas as pd

# Define the folder path and base file names
folder_path = "C:\\Users\\rahla\\Desktop\\dataset\\"
op_folder_path = "C:\\Users\\rahla\\Desktop\\Sikkim-Rainfall-Prediction\\Transformed_Dataset Individual Years\\"
input_base_name = "SI"  # Input file base name
output_base_name = "T_SI_"  # Output file base name

# Loop through the years 2011 to 2020
for year in range(1938, 1978):
    input_file_path = f"{folder_path}{input_base_name}{year}.xlsx"  # Construct the input file path
    output_file_path = f"{op_folder_path}{output_base_name}{year}.xlsx"  # Construct the output file path
    
    try:
        # Load the dataset
        df = pd.read_excel(input_file_path)
        
        # Extract year and month from the Date column
        df['Year'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce').dt.year
        df['Month'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce').dt.month
        
        # Melt the location columns into 'Location' and 'Value' columns
        melted_df = pd.melt(
            df,
            id_vars=['Date', 'Day', 'Year', 'Month'],  # Columns to keep
            value_vars=['Loc_0', 'Loc_1', 'Loc_2', 'Loc_3', 'Loc_4', 'Loc_5', 'Loc_6', 'Loc_7', 'Loc_8', 'Loc_9', 'Loc_10'],  # Columns to melt
            var_name='Location',  # New column for location names
            value_name='Rainfall (in mm)'  # New column for location values
        )
        
        # Create a mapping for latitude and longitude
        location_mapping = {
            'Loc_0': {'Latitude': 27.25, 'Longitude': 88.25},
            'Loc_1': {'Latitude': 27.25, 'Longitude': 88.5},
            'Loc_2': {'Latitude': 27.25, 'Longitude': 88.75},
            'Loc_3': {'Latitude': 27.5,  'Longitude': 88.25},
            'Loc_4': {'Latitude': 27.5,  'Longitude': 88.5},
            'Loc_5': {'Latitude': 27.5,  'Longitude': 88.75},
            'Loc_6': {'Latitude': 27.75, 'Longitude': 88.25},
            'Loc_7': {'Latitude': 27.75, 'Longitude': 88.5},
            'Loc_8': {'Latitude': 27.75, 'Longitude': 88.75},
            'Loc_9': {'Latitude': 28.0,  'Longitude': 88.5},
            'Loc_10': {'Latitude': 28.0,  'Longitude': 88.75},
        }
        
        # Add Latitude and Longitude columns based on the mapping
        melted_df['Latitude'] = melted_df['Location'].map(lambda loc: location_mapping[loc]['Latitude'])
        melted_df['Longitude'] = melted_df['Location'].map(lambda loc: location_mapping[loc]['Longitude'])
        
        # Calculate the quarter of the year based on the Month column
        melted_df['Quarter'] = (melted_df['Month'] - 1) // 3 + 1
        
        # Save the final DataFrame to the output file
        melted_df.to_excel(output_file_path, index=False)
        
        print(f"All changes applied and saved to: {output_file_path}")
    
    except Exception as e:
        print(f"Error processing file {input_file_path}: {e}")


All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1938.xlsx
All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1939.xlsx
All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1940.xlsx
All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1941.xlsx
All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1942.xlsx
All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1943.xlsx
All changes applied and saved to: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1944.xlsx
All changes applied and saved to: C:\Users\rahla

In [4]:
#remove empty rows from all the datasets

import pandas as pd

# Define the folder path and base file name
folder_path = "C:\\Users\\rahla\\Desktop\\Sikkim-Rainfall-Prediction\\Transformed_Dataset Individual Years\\"
base_file_name = "T_SI_"

# Loop through the years 2000 to 2021
for year in range(1938, 1978):
    file_path = f"{folder_path}{base_file_name}{year}.xlsx"  # Construct the file path
    try:
        # Load the dataset
        df = pd.read_excel(file_path)
        
        # Remove rows where the Date column is empty
        df = df[df['Date'].notna()]
        
        # Save the updated dataset back to the same file
        df.to_excel(file_path, index=False)
        print(f"Processed and saved: {file_path}")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")


Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1938.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1939.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1940.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1941.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1942.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1943.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1944.xlsx
Processed and saved: C:\Users\rahla\Desktop\Sikkim-Rainfall-Prediction\Transformed_Dataset Individual Years\T_SI_1945.xlsx
Processed and sa