In [1]:
import pandas as pd
import numpy as np

# 1882-1885 Nov-Feb

In [69]:
raw_df = pd.read_csv('/Users/Sienna/Desktop/1882_1885_nov_feb_prcp.csv')
raw_df.drop(columns=["station", "name", "region", "country"], inplace=True)
raw_df = raw_df.dropna(subset=["reading"])
raw_df["date"] = None

month_mapping = {
    'november': "11",
    'december': "12",
    'january': "01",
    'february': "02"
}

raw_df['month'] = raw_df['month'].replace(month_mapping)

for index, row in raw_df.iterrows():
    date = f'{row["year"]}-{row["month"]}-01'
    raw_df.at[index, "date"] = date

raw_df.rename(columns={'reading': 'raw_prcp'}, inplace=True)
raw_df.drop(columns=["year", "month"], inplace=True)

raw_df = raw_df[["date", "latitude", "longitude", "elevation", "raw_prcp"]]

In [71]:
mean_df = pd.read_csv('/Users/Sienna/Desktop/nov_feb_mean.csv')
mean_df  = mean_df[["latitude", "longitude", "elevation", "month", "mean"]]

month_mapping = {
    'November': "11",
    'December': "12",
    'January': "01",
    'February': "02"
}

mean_df['month'] = mean_df['month'].replace(month_mapping)

In [72]:
# add the mean to the raw_df

raw_df["mean"] = None

for index, row in raw_df.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    ele = row["elevation"]
    mon = row["date"][5:7]
    mean = float(mean_df[(mean_df['latitude'] == lat) & (mean_df['longitude'] == lon) & 
                   (mean_df['elevation'] == ele) & (mean_df['month'] == mon)]["mean"])
    raw_df.at[index, "mean"] = mean

In [73]:
raw_df

Unnamed: 0,date,latitude,longitude,elevation,raw_prcp,mean
0,1882-11-01,11.70,92.70,79,250.2,226.757639
1,1882-12-01,11.70,92.70,79,30.5,149.506897
2,1882-11-01,16.80,96.20,23,27.7,61.828571
3,1882-12-01,16.80,96.20,23,2.5,6.175676
4,1882-11-01,16.50,97.60,22,45.2,40.656522
...,...,...,...,...,...,...
461,1885-02-01,-3.07,128.18,7,278.0,288.862069
462,1885-01-01,-4.53,129.88,656,229.0,241.394366
463,1885-02-01,-4.53,129.88,656,279.0,198.342466
464,1885-01-01,14.60,121.00,16,2.0,17.976531


In [74]:
# get all combinations of (lat, lon, ele)
lat_lon_ele_combos = []

for index, row in raw_df.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    ele = row["elevation"]
    lat_lon_ele = (lat, lon, ele)
    if lat_lon_ele not in lat_lon_ele_combos:
        lat_lon_ele_combos.append(lat_lon_ele)

In [75]:
# iterate through each location and calculate seasonal anomaly
columns = ["date", "latitude", "longitude", "elevation", "seasonal_prcp", "seasonal_mean", "seasonal_anomaly"]
season_df = pd.DataFrame(columns=columns)

for lat_lon_ele in lat_lon_ele_combos:
    lat, lon, ele = lat_lon_ele
    cur_df = raw_df[(raw_df["latitude"] == lat) & 
                    (raw_df["longitude"] == lon) & 
                    (raw_df["elevation"] == ele)].sort_values(by='date')
    
    if cur_df.shape[0] != 12:
        print(f"Data size incorrect at lat {lat}, lon {lon}, ele {ele} with size {cur_df.shape[0]}")
    
    seasonal_prcp = 0
    seasonal_mean = 0
    date = ""
    count = 0
    
    for index, row in cur_df.iterrows():
        
        # start of the season
        if row["date"][5:7] == "11":
            seasonal_prcp = row["raw_prcp"]
            seasonal_mean = row["mean"]
            date = row["date"]
            count = 1
        
        # end of the season
        elif row["date"][5:7] == "02" and date and int(row["date"][0:4]) - int(date[0:4]) == 1:
            count += 1
            if count == 4:
                seasonal_prcp += row["raw_prcp"]
                seasonal_mean += row["mean"]
                new_row = {"date": date, "latitude": row["latitude"], "longitude": row["longitude"], 
                           "elevation": row["elevation"], "seasonal_prcp": seasonal_prcp, 
                           "seasonal_mean": seasonal_mean, 
                           "seasonal_anomaly": round(seasonal_prcp/seasonal_mean * 100, 4)}
                season_df.loc[len(season_df)] = new_row
            
        # during a season
        else:
            seasonal_prcp += row["raw_prcp"]
            seasonal_mean += row["mean"]
            count += 1
            

Data size incorrect at lat 13.7, lon 100.5, ele 12 with size 6
Data size incorrect at lat -14.48, lon 132.27, ele 103 with size 9
Data size incorrect at lat -13.82, lon 131.83, ele 189 with size 11
Data size incorrect at lat 1.5, lon 110.3, ele 26 with size 8
Data size incorrect at lat 1.5, lon 124.9, ele 80 with size 11
Data size incorrect at lat 14.6, lon 121.0, ele 16 with size 11
Data size incorrect at lat -8.33, lon 114.65, ele 8 with size 8
Data size incorrect at lat -12.55, lon 130.7, ele 15 with size 6
Data size incorrect at lat -12.45, lon 130.83, ele 15 with size 6
Data size incorrect at lat -14.97, lon 133.33, ele 107 with size 4
Data size incorrect at lat 6.67, lon 116.85, ele 0 with size 4


In [76]:
season_df

Unnamed: 0,date,latitude,longitude,elevation,seasonal_prcp,seasonal_mean,seasonal_anomaly
0,1882-11-01,11.70,92.70,79,303.3,418.277417,72.5117
1,1883-11-01,11.70,92.70,79,741.6,418.277417,177.2986
2,1884-11-01,11.70,92.70,79,291.8,418.277417,69.7623
3,1882-11-01,16.80,96.20,23,30.2,75.275149,40.1195
4,1883-11-01,16.80,96.20,23,187.7,75.275149,249.3519
...,...,...,...,...,...,...,...
98,1883-11-01,-8.33,114.65,8,943.0,836.529330,112.7277
99,1884-11-01,-8.33,114.65,8,1014.0,836.529330,121.2151
100,1884-11-01,-12.55,130.70,15,1382.3,1197.858839,115.3976
101,1884-11-01,-12.45,130.83,15,1192.5,1178.867417,101.1564


In [78]:
season_df.to_csv("/Users/Sienna/Desktop/nov_feb_seasonal_anomaly.csv")

# 1883-1885 Jun-Aug

In [79]:
raw_df = pd.read_csv('/Users/Sienna/Desktop/1883_1885_jun_aug_prcp.csv')
raw_df.drop(columns=["station", "name", "region", "country"], inplace=True)
raw_df = raw_df.dropna(subset=["reading"])
raw_df["date"] = None

month_mapping = {
    'june': "06",
    'july': "07",
    'august': "08"
}

raw_df['month'] = raw_df['month'].replace(month_mapping)

for index, row in raw_df.iterrows():
    date = f'{row["year"]}-{row["month"]}-01'
    raw_df.at[index, "date"] = date

raw_df.rename(columns={'reading': 'raw_prcp'}, inplace=True)
raw_df.drop(columns=["year", "month"], inplace=True)

raw_df = raw_df[["date", "latitude", "longitude", "elevation", "raw_prcp"]]

In [80]:
mean_df = pd.read_csv('/Users/Sienna/Desktop/jun_aug_mean.csv')
mean_df  = mean_df[["latitude", "longitude", "elevation", "month", "mean"]]

month_mapping = {
    'June': "06",
    'July': "07",
    'August': "08"
}

mean_df['month'] = mean_df['month'].replace(month_mapping)

In [81]:
# add the mean to the raw_df

raw_df["mean"] = None

for index, row in raw_df.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    ele = row["elevation"]
    mon = row["date"][5:7]
    mean = float(mean_df[(mean_df['latitude'] == lat) & (mean_df['longitude'] == lon) & 
                   (mean_df['elevation'] == ele) & (mean_df['month'] == mon)]["mean"])
    raw_df.at[index, "mean"] = mean

In [82]:
raw_df

Unnamed: 0,date,latitude,longitude,elevation,raw_prcp,mean
0,1883-06-01,11.70,92.70,79,418.3,488.032653
1,1883-07-01,11.70,92.70,79,496.1,411.356463
2,1883-08-01,11.70,92.70,79,446.8,414.646259
3,1883-06-01,16.80,96.20,23,356.4,491.026496
4,1883-07-01,16.80,96.20,23,478.0,550.075652
...,...,...,...,...,...,...
352,1885-07-01,-4.53,129.88,656,67.0,200.055556
353,1885-08-01,-4.53,129.88,656,9.0,105.690141
354,1885-06-01,14.60,121.00,16,169.5,263.311881
355,1885-07-01,14.60,121.00,16,313.9,427.269307


In [83]:
# get all combinations of (lat, lon, ele)
lat_lon_ele_combos = []

for index, row in raw_df.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    ele = row["elevation"]
    lat_lon_ele = (lat, lon, ele)
    if lat_lon_ele not in lat_lon_ele_combos:
        lat_lon_ele_combos.append(lat_lon_ele)

In [84]:
# iterate through each location and calculate seasonal anomaly
columns = ["date", "latitude", "longitude", "elevation", "seasonal_prcp", "seasonal_mean", "seasonal_anomaly"]
season_df = pd.DataFrame(columns=columns)

for lat_lon_ele in lat_lon_ele_combos:
    lat, lon, ele = lat_lon_ele
    cur_df = raw_df[(raw_df["latitude"] == lat) & 
                    (raw_df["longitude"] == lon) & 
                    (raw_df["elevation"] == ele)].sort_values(by='date')
    
    if cur_df.shape[0] != 9:
        print(f"Data size incorrect at lat {lat}, lon {lon}, ele {ele} with size {cur_df.shape[0]}")
    
    seasonal_prcp = 0
    seasonal_mean = 0
    date = ""
    count = 0
    
    for index, row in cur_df.iterrows():
        
        # start of the season
        if row["date"][5:7] == "06":
            seasonal_prcp = row["raw_prcp"]
            seasonal_mean = row["mean"]
            date = row["date"]
            count = 1
        
        # end of the season
        elif row["date"][5:7] == "08" and date and int(row["date"][0:4]) == int(date[0:4]):
            count += 1
            if count == 3:
                seasonal_prcp += row["raw_prcp"]
                seasonal_mean += row["mean"]
                new_row = {"date": date, "latitude": row["latitude"], "longitude": row["longitude"], 
                           "elevation": row["elevation"], "seasonal_prcp": seasonal_prcp, 
                           "seasonal_mean": seasonal_mean, 
                           "seasonal_anomaly": round(seasonal_prcp/seasonal_mean * 100, 4)}
                season_df.loc[len(season_df)] = new_row
            
        # during a season
        else:
            seasonal_prcp += row["raw_prcp"]
            seasonal_mean += row["mean"]
            count += 1
            

Data size incorrect at lat -12.55, lon 130.7, ele 15 with size 4
Data size incorrect at lat -14.48, lon 132.27, ele 103 with size 6
Data size incorrect at lat -13.82, lon 131.83, ele 189 with size 6
Data size incorrect at lat -8.33, lon 114.65, ele 8 with size 8
Data size incorrect at lat -12.45, lon 130.83, ele 15 with size 6
Data size incorrect at lat 1.5, lon 110.3, ele 26 with size 6
Data size incorrect at lat 6.67, lon 116.85, ele 0 with size 3


In [85]:
season_df

Unnamed: 0,date,latitude,longitude,elevation,seasonal_prcp,seasonal_mean,seasonal_anomaly
0,1883-06-01,11.70,92.70,79,1361.2,1314.035374,103.5893
1,1884-06-01,11.70,92.70,79,1170.7,1314.035374,89.0920
2,1885-06-01,11.70,92.70,79,1175.3,1314.035374,89.4420
3,1883-06-01,16.80,96.20,23,1204.0,1557.986219,77.2792
4,1884-06-01,16.80,96.20,23,1543.6,1557.986219,99.0766
...,...,...,...,...,...,...,...
106,1884-06-01,-12.45,130.83,15,0.0,8.711905,0.0000
107,1885-06-01,-12.45,130.83,15,0.5,8.711905,5.7393
108,1884-06-01,1.50,110.30,26,609.0,639.099924,95.2903
109,1885-06-01,1.50,110.30,26,552.0,639.099924,86.3715


In [86]:
season_df.to_csv("/Users/Sienna/Desktop/jun_aug_seasonal_anomaly.csv")