In [11]:
import pandas as pd

file_path = 'AHCCD_CONFINED_filtered.csv'
data = pd.read_csv(file_path)

month_columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

data['Blank_Months_Count'] = data[month_columns].isna().sum(axis=1) #column for missing months

filtered_data = data[data['Blank_Months_Count'] < 3].reset_index(drop=True) #remove rows with 3 or more missing

def fill_missing_values_by_station_and_type(df, month_columns):
    df = df.sort_values(by=['Station', 'Value Type', 'Year']).reset_index(drop=True)
    grouped = df.groupby(['Station', 'Value Type'])

    for (station, value_type), group in grouped:
        for idx in range(1, len(group)):  #starting from second row
            current_row = group.iloc[idx]
            previous_row = group.iloc[idx - 1]
            
            for month in month_columns:
                if pd.isna(current_row[month]):#check for missing value
                    previous_mean = previous_row[month_columns].mean(skipna=True)#means for previous and current year
                    current_mean = current_row[month_columns].mean(skipna=True)
                    
                    if not pd.isna(previous_mean) and not pd.isna(current_mean):#only proceed if both means available
                        percentage_change = current_mean / previous_mean if previous_mean != 0 else 0
                        previous_value = previous_row[month]
                        
                        if not pd.isna(previous_value):#only calc if previous month not nan
                            df.loc[current_row.name, month] = previous_value * percentage_change

    return df

filled_data_corrected = fill_missing_values_by_station_and_type(filtered_data, month_columns)#applying function

corrected_file_path = 'AHCCD_CONFINED_filled_corrected.csv'
filled_data_corrected.to_csv(corrected_file_path, index=False)

print(f"Filled dataset saved to: {corrected_file_path}")


Filled dataset saved to: AHCCD_CONFINED_filled_corrected.csv


In [16]:
import pandas as pd

file_path = 'AHCCD_CONFINED_with_seasonal_filled.csv'
data = pd.read_csv(file_path)

month_columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Winter']

cleaned_data = data.dropna(subset=month_columns) #remove blank month rows

cleaned_file_path = 'AHCCD_CONFINED_with_seasonal_filled.csv'
cleaned_data.to_csv(cleaned_file_path, index=False)

print(f"Cleaned dataset saved to: {cleaned_file_path}")


Cleaned dataset saved to: AHCCD_CONFINED_with_seasonal_filled.csv


In [13]:
import pandas as pd

file_path = 'AHCCD_CONFINED_filled_corrected.csv'
data = pd.read_csv(file_path)

month_columns = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def calculate_seasonal_means(df):
    for index, row in df.iterrows():
        if pd.isna(row['Annual']):#calc annual mean
            df.at[index, 'Annual'] = row[month_columns].mean(skipna=True)

        if pd.isna(row['Spring']):#calc sesonal means
            df.at[index, 'Spring'] = row[['Mar', 'Apr', 'May']].mean(skipna=True)
        if pd.isna(row['Summer']):
            df.at[index, 'Summer'] = row[['Jun', 'Jul', 'Aug']].mean(skipna=True)
        if pd.isna(row['Autumn']):
            df.at[index, 'Autumn'] = row[['Sep', 'Oct', 'Nov']].mean(skipna=True)

        if pd.isna(row['Winter']):#calc winter
            current_year = row['Year']
            station_data = df[df['Station'] == row['Station']]
            previous_year_data = station_data[station_data['Year'] == current_year - 1]
            
            dec_previous_year = previous_year_data['Dec'].values[0] if not previous_year_data.empty else None #get december from last year if available

            if dec_previous_year is not None:
                winter_values = [row['Jan'], row['Feb'], dec_previous_year]
                df.at[index, 'Winter'] = pd.Series(winter_values).mean(skipna=True)
    
    return df

filled_data = calculate_seasonal_means(data)

output_file_path = 'AHCCD_CONFINED_with_seasonal_filled.csv'
filled_data.to_csv(output_file_path, index=False)

print(f"Updated dataset with seasonal and annual values saved to: {output_file_path}")


Updated dataset with seasonal and annual values saved to: AHCCD_CONFINED_with_seasonal_filled.csv
