In [102]:
import pandas as pd
import numpy as np
import os
import warnings
import math
import scipy.interpolate
import matplotlib.pyplot as plt
import seaborn as sns
import csv
from tqdm import tqdm

In [103]:
# combined weather data
weather_path = os.path.join(os.getcwd(),"data",'weather_20190226.csv')
# coordinates of all the towns
town_coord_path = os.path.join(os.getcwd(), "data",'FieldSiteLocations.csv')

In [104]:
weather = pd.read_csv(weather_path, header = 0, parse_dates=[0])
#weather['LOCATION_TOWN'] = weather['LOCATION_TOWN'].str.upper()

In [105]:
town_coord = pd.read_csv(town_coord_path, header = 0, parse_dates=[0])
town_coord.columns = ['City', 'Longitute-trimmed', 'Latitude-trimmed']
town_coord['City'] = town_coord['City'].str.upper()

In [106]:
weather.tail(20)

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,AWND,PRCP,TMAX,TMIN
242436,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/16/2017,,0.0,,
242437,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/17/2017,,0.0,,
242438,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/18/2017,,0.0,,
242439,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/19/2017,,0.0,,
242440,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/20/2017,,0.0,,
242441,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/21/2017,,0.0,,
242442,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/22/2017,,0.0,,
242443,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/23/2017,,0.0,,
242444,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/24/2017,,0.0,,
242445,USC00115272,"MACKINAW 1 N, IL US",40.55165,-89.334,214.0,12/25/2017,,2.0,,


In [107]:
town_coord.head()

Unnamed: 0,City,Longitute-trimmed,Latitude-trimmed
0,F_BELLEVILLE,-89.984,38.513
1,F_BETHANY,-88.741,39.645
2,F_CLAYTON,-90.953,40.028
3,F_DELAVAN,-89.547,40.372
4,F_DU QUOIN,-89.243,38.011


In [108]:
# latitude/longitude coordinates of all the stations
station_coord = weather.groupby(by=['STATION','LATITUDE','LONGITUDE'], as_index=False).first()[['STATION','LATITUDE','LONGITUDE']]
station_coord.head()

Unnamed: 0,STATION,LATITUDE,LONGITUDE
0,US1ILPT0002,40.0406,-88.7039
1,US1ILWD0008,41.8876,-89.8136
2,US1KSMP0014,38.3982,-97.5207
3,US1KYMM0003,38.0655,-83.9881
4,USC00110137,38.86702,-90.14886


Calculating the distance between two places based on Haversine Formula:

<img src="image/distance_calculate.png" align="left"/>

In [109]:
EARTH_REDIUS = 6378.137

def rad(d):
    return d * math.pi / 180.0

def getDistance(lat1, lon1, lat2, lon2):
    """
    calculate the distance between two places
    """
    radLat1 = rad(lat1) # WA
    radLat2 = rad(lat2) # Wb
    radLon1 = rad(lon1) #Ja
    radLon2 = rad(lon2) #Jb
    a = radLat1 - radLat2
    b = radLon1 - radLon2
    # Haversine Formula
    s = 2 * math.asin(math.sqrt(math.pow(math.sin(a/2), 2) + 
                                math.cos(radLat1) * math.cos(radLat2) * math.pow(math.sin(b/2), 2)))
    s = s * EARTH_REDIUS
    return s

Straight-line distance between Belleville and Mount Sterling Station (USC00115935) is 176.14 kilometers

<img src="image/distance_validation.png" align="left" width="200" height="100"/>

In [110]:
# straight-line distance between Belleville and Mount Sterling Station based on getDistance function
print(getDistance(39.9841, -90.7525, 38.52, -89.98),'kilometers')

176.0598743614326 kilometers


In [111]:
def nearest_stat(num_nearest):
    # a dictionary containing the nearest five stations' IDs and distances of each town
    nearest_stations = {}

    # a certain town
    for town_index in town_coord.index.values:
        # latitude/longitude of the town
        town_lon = town_coord.iloc[town_index]['Longitute-trimmed']
        town_lat = town_coord.iloc[town_index]['Latitude-trimmed']

        # a dictionary containing all the stations' IDs and distances to the town
        all_stations = {}

        # go throught all the stations and calculate the distance between the town and each station
        for station_index in station_coord.index.values:
            station_lon = station_coord.iloc[station_index]['LONGITUDE']
            station_lat = station_coord.iloc[station_index]['LATITUDE']
            # insert the distance between the town and a station
            all_stations[station_coord.iloc[station_index]['STATION']] = getDistance(town_lat, town_lon, station_lat, station_lon)

        # sort all the stations according to their distances to the town
        all_stations = sorted(all_stations.items(), key=lambda d: d[1])

        # get the five nearest stations
        nearest_stations[town_coord.iloc[town_index]['City']] = all_stations[:num_nearest]
    return nearest_stations
    
# get the nearest stations of Belleville
nearest_stations = nearest_stat(7)
#nearest_stations['BELLEVILLE']

In [112]:
nearest_stations['F_BELLEVILLE']

[('USW00013802', 12.49070191509044),
 ('USC00237452', 28.093192044681526),
 ('USC00112679', 33.03060160203802),
 ('USC00110137', 41.93192209565437),
 ('USC00116011', 57.71546612824573),
 ('USC00113693', 63.74815870243718),
 ('USC00116642', 85.4219683075872)]

In [113]:
def get_flag(param, station_id,flag_index = 2):
    """
    get a certain flag of a certain parameter's attribtue
    E.g. ',L,7,1700' for 'PRCP_ATTRIBUTE' indicates that the value should be removed because the 2nd field is populated
    """
    try:
        # split the attribute column by comma
        param_flags = weather[weather['STATION'] == station_id][param +'_ATTRIBUTES'].str.split(',', expand = True)
        # return the target flag column
        return param_flags[flag_index - 1]
    # the parameter does not have a corresponding attribute column
    except:
        # return a column filled with ''
        return pd.Series('', index = weather[weather['STATION'] == station_id].index)

In [114]:
sum(get_flag('TMAX', 'USW00013802') != '')

0

Calculation of interpolated values is based on the formula:

<img src="image/idw-formula.png" align="left"/>

In [115]:
def IDW_interpolation(town, param, power,num_nearest, drop_invalid = False):
    nearest_stations = nearest_stat(num_nearest)
    station_id_list = [i[0] for i in nearest_stations[town]]
    distance_list = [i[1] for i in nearest_stations[town]]
    
    if drop_invalid == True:
        # get the date and target parameter columns of the first station
        combined_df = weather[weather['STATION'] == station_id_list[0]][['DATE', param]]
        # get the column of a certain attribute
        temp_flag = get_flag(param, station_id_list[0])
        # set certain values to NaN based on the attribute column
        combined_df.loc[temp_flag != '', param] = np.nan
        # append the other stations
        for station_id in station_id_list[1:]:
            temp_df = weather[weather['STATION'] == station_id][['DATE', param]]
            temp_flag = get_flag(param, station_id)
            temp_df.loc[temp_flag != '', param] = np.nan
            combined_df = pd.merge(combined_df, temp_df, on = 'DATE', how = 'outer')
        # rename the columns
        combined_df.columns = ['DATE'] + [param+str(i) for i in range(num_nearest)]
        combined_df = combined_df.drop_duplicates(keep = 'first')
    else:
        combined_df = weather[weather['STATION'] == station_id_list[0]][['DATE', param]]
        for station_id in station_id_list[1:]:
            temp_df = weather[weather['STATION'] == station_id][['DATE', param]]
            combined_df = pd.merge(combined_df, temp_df, on = 'DATE', how = 'outer')
        combined_df.columns = ['DATE'] + [param+str(i) for i in range(num_nearest)]
        combined_df = combined_df.drop_duplicates(keep = 'first')
    
    interpolated_value = []
    # go though all the rows
    for index, row in combined_df.iterrows():
        numerator = []
        denominator = []
        # a value of NaN should be left out during the calculation
        not_null_indice = []
        # indice of NaNs in a row
        for i in range(1, num_nearest+1):
            if row[i] == row[i]:
                not_null_indice.append(i)

        for i in not_null_indice:
            numerator.append(row[i]/(distance_list[i-1])**int(power))
            denominator.append(1/(distance_list[i-1])**int(power))
    
        try:
            interpolated_value.append(sum(numerator) / sum(denominator))
        # all values in the row are NaNs
        except:
            # set the interpolated value as NaN
            interpolated_value.append(np.nan)
    
    # add a column of interpolated values to the dataframe
    combined_df = combined_df.reset_index().join(pd.DataFrame(interpolated_value))
    del combined_df['index']
    combined_df.rename(columns = {0:'Interpolated '+param}, inplace=True)
    
    return combined_df

In [116]:
#CSV for Thomasboro
def write_interpolated_values(param,num_nearest, power):
    """
    write the interpolated values of a certain parameter into a csv
    """
    with open("THOMASBORO_"+param + "(p" + str(power) + 'n' + str(num_nearest) +").csv","w") as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        
        writer.writerow(['CITY'] + list(IDW_interpolation(town_coord['City'][0], param, power, num_nearest).columns))

        with tqdm (total = len(town_coord['City'])) as pbar:
            for town in town_coord['City']:
                if town == 'THOMASBORO':
                    combined_df = IDW_interpolation(town, param, power,num_nearest)
                    for index, row in combined_df.iterrows():
                        writer.writerow([town] + [i for i in row])
                pbar.update()

In [117]:
# nearest_stations_list = [1,2,3,5,7,9]

# for x in nearest_stations_list:
#     write_interpolated_values('PRCP',x,0.5)

# for x in nearest_stations_list:
#     write_interpolated_values('PRCP',x,1)

# for x in nearest_stations_list:
#     write_interpolated_values('PRCP',x,2)

# for x in nearest_stations_list:
#     write_interpolated_values('PRCP',x,3)

In [118]:
# not_dopped.equals(dropped)

In [119]:
# %%time
# IDW_interpolation('BELLEVILLE', 'PRCP', 2,2).head(10)

In [120]:
# %%time
# IDW_interpolation('BELLEVILLE', 'PRCP', 3).head(10)

In [121]:
# def lineplot(combined_df):
#     """
#     visualize the interpolated values for a certain parameter into a line plot
#     """
#     df = combined_df[['DATE', combined_df.columns[-1]]].set_index('DATE')
#     df.index = pd.to_datetime(df.index)
#     sns.set_context({"figure.figsize": (15, 8)})
#     sns.set_style('darkgrid')
#     ax = df.plot()
#     plt.title('Line Plot of Interpolated Values')
#     plt.xticks(rotation = 45)
#     plt.show()

In [122]:
# def boxplot(combined_df):
#     """
#     visualize the interpolated values for a certain parameter into a box plot
#     """
#     combined_df = combined_df.set_index('DATE')
#     combined_df.index = pd.to_datetime(combined_df.index)
#     combined_df['Year'] = combined_df.index.year
    
#     sns.set_context({"figure.figsize": (15, 8)})
#     ax = sns.boxplot(data = combined_df, x='Year',y= combined_df.columns[-2])
#     plt.title('Box Plot of Interpolated Values')
    
#     plt.show()

In [123]:
# boxplot(IDW_interpolation('BELLEVILLE', 'TMAX', 2))

In [124]:
def write_interpolated_values(param, power = 3):
    """
    write the interpolated values of a certain parameter into a csv
    """
    with open(param + "(p" + str(power) + 'n' + str(num_nearest) +").csv","w") as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        
        writer.writerow(['CITY'] + list(IDW_interpolation(town_coord['City'][0], param, power).columns))

        with tqdm (total = len(town_coord['City'])) as pbar:
            for town in town_coord['City']:
                combined_df = IDW_interpolation(town, param, power)
                for index, row in combined_df.iterrows():
                    writer.writerow([town] + [i for i in row])
                pbar.update()

In [125]:
# def write_interpolated_values_without_error_removal(param, power = 2):
#     """
#     write the interpolated values of a certain parameter into a csv
#     """
#     with open(param + "_(p" + str(power) + 'n' + str(num_nearest) +").csv","w") as csvfile:
#         writer = csv.writer(csvfile, delimiter=',')
        
#         writer.writerow(['CITY'] + list(IDW_interpolation(town_coord['City'][0], param, power).columns))

#         with tqdm (total = len(town_coord['City'])) as pbar:
#             for town in town_coord['City']:
#                 combined_df = IDW_interpolation(town, param, power, True)
#                 for index, row in combined_df.iterrows():
#                     writer.writerow([town] + [i for i in row])
#                 pbar.update()

In [126]:
# p = 3 and n = 7


def write_interpolated_values_p3_n7(param,num_nearest, power):
    """
    write the interpolated values of a certain parameter into a csv
    """
    with open(param + "(p" + str(power) + 'n' + str(num_nearest) +").csv","w") as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        
        writer.writerow(['CITY'] + list(IDW_interpolation(town_coord['City'][0], param, power, num_nearest).columns))

        with tqdm (total = len(town_coord['City'])) as pbar:
            for town in town_coord['City']:
                combined_df = IDW_interpolation(town, param, power,num_nearest)
                for index, row in combined_df.iterrows():
                    writer.writerow([town] + [i for i in row])
                pbar.update()

In [127]:
town = town_coord['City'][2]
param = 'TMAX'
num = 7
num_nearest = 7
power = 3

station_id_list = [i[0] for i in nearest_stations[town]]
distance_list = [i[1] for i in nearest_stations[town]]
    

combined_df = weather[weather['STATION'] == station_id_list[0]][['DATE', param]]
for station_id in station_id_list[1:]:
    temp_df = weather[weather['STATION'] == station_id][['DATE', param]]
    combined_df = pd.merge(combined_df, temp_df, on = 'DATE', how = 'outer')
combined_df.columns = ['DATE'] + [param+str(i) for i in range(num_nearest)]
combined_df = combined_df.drop_duplicates(keep = 'first')

In [128]:
station_id_list

['USC00115935',
 'USW00093989',
 'USC00116738',
 'USC00117551',
 'USC00110598',
 'USC00116837',
 'USC00237578']

In [129]:
weather[weather['STATION'] == 'USW00093810'][['DATE', param]]

Unnamed: 0,DATE,TMAX
98764,1/1/2011,54.0
98765,1/2/2011,33.0
98766,1/3/2011,43.0
98767,1/4/2011,42.0
98768,1/5/2011,40.0
98769,1/6/2011,40.0
98770,1/7/2011,41.0
98771,1/8/2011,31.0
98772,1/9/2011,31.0
98773,1/10/2011,33.0


In [130]:
IDW_interpolation(town_coord['City'][2], 'TMAX', 3, 7)

Unnamed: 0,DATE,TMAX0,TMAX1,TMAX2,TMAX3,TMAX4,TMAX5,TMAX6,Interpolated TMAX
0,1/1/2011,,27.0,61.0,58.0,63.0,62.0,63.0,46.083894
1,1/2/2011,,32.0,28.0,24.0,23.0,27.0,32.0,28.914637
2,1/3/2011,,40.0,33.0,30.0,33.0,32.0,33.0,35.664638
3,1/4/2011,,30.0,43.0,40.0,41.0,43.0,43.0,36.764813
4,1/5/2011,,43.0,32.0,27.0,31.0,31.0,35.0,36.206510
5,1/6/2011,,33.0,44.0,41.0,41.0,43.0,47.0,38.615927
6,1/7/2011,,32.0,34.0,31.0,33.0,32.0,,32.519308
7,1/11/2011,,25.0,30.0,29.0,29.0,28.0,29.0,27.459184
8,1/12/2011,,18.0,26.0,24.0,29.0,25.0,26.0,22.502193
9,1/13/2011,,20.0,22.0,19.0,23.0,18.0,17.0,20.471609


In [131]:
write_interpolated_values_p3_n7('PRCP',7,3)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [02:26<00:00,  2.89s/it]


In [132]:
write_interpolated_values_p3_n7('TMAX',7,3)
#write_interpolated_values_p3_n7('AWND',7,3)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [02:24<00:00,  3.07s/it]


In [133]:
write_interpolated_values_p3_n7('TMIN',7,3)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [02:30<00:00,  2.85s/it]


In [134]:
import os
os.listdir()

['.ipynb_checkpoints',
 'Cropset_building_kewei.ipynb',
 'crop_model',
 'Crop_visualization_kewei.ipynb',
 'data',
 'interpolation+(5).ipynb',
 'interpolation_weather.ipynb',
 'picture',
 'PRCP(p3n7).csv',
 'report',
 'Soil check.ipynb',
 'TMAX(p3n7).csv',
 'TMIN(p3n7).csv',
 'town_csv',
 'Untitled.ipynb',
 'Untitled1.ipynb']

In [135]:
import re
file_list = [f for f in os.listdir() if re.match(r'.*\.csv', f)]

In [136]:
df = pd.read_csv(file_list[0] )

In [137]:
df.head()

Unnamed: 0,CITY,DATE,PRCP0,PRCP1,PRCP2,PRCP3,PRCP4,PRCP5,PRCP6,Interpolated PRCP
0,F_BELLEVILLE,1/1/2011,0.0,0.19,0.15,0.34,0.91,0.48,1.05,0.041975
1,F_BELLEVILLE,1/2/2011,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
2,F_BELLEVILLE,1/3/2011,0.0,0.0,0.0,,0.0,0.0,,0.0
3,F_BELLEVILLE,1/4/2011,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
4,F_BELLEVILLE,1/5/2011,0.0,0.0,0.0,0.0,0.0,0.0,,0.0


In [138]:
df2 = pd.read_csv(file_list[1] )
df2.head()

Unnamed: 0,CITY,DATE,TMAX0,TMAX1,TMAX2,TMAX3,TMAX4,TMAX5,TMAX6,Interpolated TMAX
0,F_BELLEVILLE,1/1/2011,59.0,63.0,,64.0,64.0,,,59.475263
1,F_BELLEVILLE,1/2/2011,34.0,34.0,,34.0,31.0,,,33.972957
2,F_BELLEVILLE,1/3/2011,45.0,36.0,,,32.0,,,44.159572
3,F_BELLEVILLE,1/4/2011,42.0,47.0,,46.0,42.0,,,42.48485
4,F_BELLEVILLE,1/5/2011,41.0,39.0,,38.0,37.0,,,40.737094


In [63]:
df3 = pd.read_csv(file_list[2] )
df3.head()

Unnamed: 0,CITY,DATE,TMIN0,TMIN1,TMIN2,TMIN3,TMIN4,TMIN5,TMIN6,Interpolated TMIN
0,F_BELLEVILLE,1/1/2011,28.0,25.0,,26.0,29.0,,,27.727507
1,F_BELLEVILLE,1/2/2011,18.0,18.0,,21.0,19.0,,,18.079533
2,F_BELLEVILLE,1/3/2011,22.0,26.0,,,19.0,,,22.292492
3,F_BELLEVILLE,1/4/2011,23.0,28.0,,20.0,27.0,,,23.356365
4,F_BELLEVILLE,1/5/2011,20.0,26.0,,29.0,20.0,,,20.680546


In [66]:
df_merge = pd.merge(df,df2, "left", on=["CITY","DATE"])

In [67]:
df_merge = pd.merge(df_merge,df3, "left", on=["CITY","DATE"])

In [68]:
df.shape

(146100, 10)

In [69]:
df_merge.shape

(146100, 26)

In [72]:
wea = df_merge.loc[:,['CITY', 'DATE', 'Interpolated TMIN', 'Interpolated TMAX',
       'Interpolated PRCP']]

In [78]:
wea.tail()

Unnamed: 0,CITY,DATE,Interpolated TMIN,Interpolated TMAX,Interpolated PRCP
146095,U_ST. PETER_17,2/9/2017,14.558895,34.89429,0.003719
146096,U_ST. PETER_17,3/11/2017,27.490374,42.481815,0.0
146097,U_ST. PETER_17,3/12/2017,23.881506,35.738056,0.0
146098,U_ST. PETER_17,3/14/2017,23.401869,32.677389,0.113543
146099,U_ST. PETER_17,12/24/2017,27.868662,35.875802,0.202288


In [74]:
wea.to_csv("InterpolatedParameters.csv")

In [76]:
wea["CITY"].unique()

array(['F_BELLEVILLE', 'F_BETHANY', 'F_CLAYTON', 'F_DELAVAN',
       'F_DU QUOIN', 'F_FLORA', 'F_FORSYTH', 'F_GALVA', 'F_GENESEO',
       'F_GRAND RIDGE', 'F_GREENVILLE', 'F_MACOMB', 'F_MALTA', 'F_MAZON',
       'F_MILLEDGEVILLE', 'F_MOWEAQUA', 'F_PAXTON', 'F_PEARL CITY',
       'F_ROSSVILLE', 'F_SALEM', 'F_SHUMWAY', 'F_SUBLETTE',
       'F_THOMASBORO', 'F_TOWANDA', 'F_TUSCOLA', 'F_VANDALIA', 'F_VIRDEN',
       'F_WALNUT', 'F_WARREN', 'F_WATSEKA', 'F_WILLIAMSVILLE',
       'F_WINNEBAGO', 'F_WOODSTOCK', 'U_DWIGHT', 'U_ELKVILLE', 'U_ERIE',
       'U_FENTON', 'U_GOODFIELD', 'U_MONMOTH', 'U_MT. MORRIS',
       'U_NEW BERLIN', 'U_PERRY', 'U_URBANA', 'U_BELLEVILLE_17',
       'U_BELLEVILLE_11', 'U_DEKALB_16', 'U_DEKALB_11', 'U_DEKALB_14',
       'U_ST. PETER_11', 'U_ST. PETER_17'], dtype=object)

In [91]:
from datetime import datetime
year = wea.loc[:,"DATE"].apply(lambda x :datetime.strptime(x, '%m/%d/%Y').year)

In [92]:
year.unique()

array([2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018], dtype=int64)

In [93]:
Date = wea.loc[:,"DATE"].apply(lambda x :datetime.strptime(x, '%m/%d/%Y'))

In [94]:
Date.sort_values()

0        2011-01-01
137334   2011-01-01
14610    2011-01-01
84738    2011-01-01
32142    2011-01-01
52596    2011-01-01
17532    2011-01-01
134412   2011-01-01
5844     2011-01-01
46752    2011-01-01
87660    2011-01-01
93504    2011-01-01
122724   2011-01-01
81816    2011-01-01
118569   2011-01-01
90582    2011-01-01
11688    2011-01-01
23376    2011-01-01
75972    2011-01-01
102270   2011-01-01
131490   2011-01-01
128568   2011-01-01
99348    2011-01-01
70128    2011-01-01
61362    2011-01-01
113958   2011-01-01
40908    2011-01-01
35064    2011-01-01
108114   2011-01-01
49674    2011-01-01
            ...    
8765     2018-12-31
144717   2018-12-31
11687    2018-12-31
14608    2018-12-31
17530    2018-12-31
20436    2018-12-31
23153    2018-12-31
26297    2018-12-31
29178    2018-12-31
32046    2018-12-31
141273   2018-12-31
35056    2018-12-31
37981    2018-12-31
140251   2018-12-31
40907    2018-12-31
43720    2018-12-31
46743    2018-12-31
49672    2018-12-31
52594    2018-12-31


In [89]:
from datetime import datetime
dt = datetime.strptime(wea.loc[:,"DATE"][0], '%m/%d/%Y')
dt.year

2011

In [101]:
wea.loc[wea["CITY"]== 'U_MONMOTH' ,"DATE"].apply(lambda x :datetime.strptime(x, '%m/%d/%Y').year).unique()

array([2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018], dtype=int64)