In [5]:
import csv
import pandas as pd
import requests
import googlemaps
from datetime import datetime
import numpy as np
from statistics import mean 

In [3]:
districts = pd.read_csv("district_list_original.csv")

In [6]:
gmaps = googlemaps.Client(key='AIzaSyA356m4BlWKTUjGBC37v1967x1SjSrYcAg')

In [7]:
# Geocode each of the 51 districts in bounding boxes
for index, row in districts.iterrows():
    components = dict()
    components['country'] = 'India'
    district_info = gmaps.geocode(row['District Name'], components=components)
    district_info = district_info[0]['geometry']['bounds']
    lat_min = district_info['southwest']['lat']
    districts.at[index, 'Lat Min'] = lat_min
    lat_max = district_info['northeast']['lat']
    districts.at[index, 'Lat Max'] = lat_max
    lng_min = district_info['northeast']['lng']
    districts.at[index, 'Long Min'] = lng_min
    lng_max = district_info['southwest']['lng']
    districts.at[index, 'Long Max'] = lng_max

In [8]:
# Determine the center of each of the 51 districts
for index, row in districts.iterrows():
    lat_center = (row['Lat Min'] + row['Lat Max']) / 2
    districts.at[index, 'Lat Center'] = lat_center
    lng_center = (row['Long Min'] + row['Long Max']) / 2
    districts.at[index, 'Long Center'] = lng_center

In [56]:
# Update csv
districts.to_csv('district_list_new.csv')

In [27]:
# Find 4 nearest readings to a given's district's center
def nearest_readings(df, lat, lng):
    # Determine the 2 closest lat values
    lat_filters = df.iloc[(df[0]-lat).abs().argsort()[:1]][0].values.tolist()
    if lat_filters[0] % 1 >= 0.5:
        lat_filter_second_nearest = lat_filters[0] - 0.5
    else: 
        lat_filter_second_nearest = lat_filters[0] + 0.5
    lat_filters.append(lat_filter_second_nearest)
    df_lat_filtered = df.loc[df[0].isin(lat_filters)]
    # Determine the 2 closest lat values
    df_lat_long_filtered = df_lat_filtered.iloc[(df_lat_filtered[1]-lng).abs().argsort()[:4]]
    return df_lat_long_filtered

In [10]:
def file_name_for_year(year):
    return 'precip.' + str(year) + '.txt'

In [30]:
# Configure output format
district_avg_precips = pd.DataFrame(columns=['District Name', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'])

In [31]:
# Main script
for year in range(2010, 2018):
    year_data = pd.read_csv(file_name_for_year(year), delim_whitespace=True, header=None)
    for index, district in districts.iterrows():
        district_lat = district['Lat Center']
        district_lng = district['Long Center']
        district_points = nearest_readings(year_data, district_lat, district_lng)
        district_points = district_points.append({0:district_lat, 1:district_lng}, ignore_index=True)
        print(district_points)
        for month in range(2,14):
            # Determine the monthly rainfall for a given district by averaging the four nearest readings
            interpolated_precip = mean(district_points[month].values.tolist()[:-1])
            district_points.at[4, month] = interpolated_precip
        district_avg_precips = district_avg_precips.append({
            'District Name': str(district['District Name'] + ' ' + str(year)),
            'January': district_points[2][4],
            'February': district_points[3][4],
            'March': district_points[4][4],
            'April': district_points[5][4],
            'May': district_points[6][4],
            'June': district_points[7][4],
            'July': district_points[8][4],
            'August': district_points[9][4],
            'September': district_points[10][4],
            'October': district_points[11][4],
            'November': district_points[12][4],
            'December': district_points[13][4]

        }, ignore_index=True)
        

          0        1     2    3     4     5     6    7     8     9     10  \
0  16.250000  76.7500  74.4  2.6   5.7  10.3  12.5  8.3  26.5  17.9  34.3   
1  16.750000  76.7500  74.3  3.2   7.0  11.2  12.4  8.1  24.8  16.4  32.1   
2  16.250000  77.2500  82.4  1.5   9.6  12.0  11.7  7.5  21.4  13.6  29.9   
3  16.750000  77.2500  81.9  1.9  10.2  12.6  11.4  7.0  19.9  12.0  27.5   
4  16.854282  74.5841   NaN  NaN   NaN   NaN   NaN  NaN   NaN   NaN   NaN   

     11   12    13     14  
0  61.3  4.4  25.5  283.7  
1  59.6  5.7  26.4  281.2  
2  61.1  8.6  28.9  288.2  
3  59.0  9.0  29.1  281.5  
4   NaN  NaN   NaN    NaN  
          0          1     2    3     4     5     6    7     8     9     10  \
0  17.250000  77.250000  81.4  2.1  11.1  13.3  11.1  6.6  18.3  10.2  25.0   
1  17.250000  77.750000  88.0  0.0  18.5  16.2   8.4  2.5   4.6   0.0   9.9   
2  17.750000  77.750000  85.5  0.0  18.0  15.9   8.8  3.8   6.7   0.2  11.5   
3  17.750000  78.250000  74.4  0.0  14.9  12.1   9.5 

In [32]:
# Export output
district_avg_precips.to_csv('district_avg_precips.csv')
