In [30]:
import csv
import pandas as pd
import requests
import googlemaps
from datetime import datetime
import numpy as np
from statistics import mean 

In [31]:
gmaps = googlemaps.Client(key='AIzaSyA356m4BlWKTUjGBC37v1967x1SjSrYcAg')

In [32]:
districts = pd.read_csv("district_list_original.csv")

In [33]:
# Geocode each of the 51 districts in bounding boxes
for index, row in districts.iterrows():
    components = dict()
    components['country'] = 'India'
    components['type'] = 'district'
    geocoding_string = row['District Name'] + ' district'
    district_info = gmaps.geocode(geocoding_string, components=components)
    district_info = district_info[0]['geometry']['bounds']
    lat_min = district_info['southwest']['lat']
    districts.at[index, 'Lat Min'] = lat_min
    lat_max = district_info['northeast']['lat']
    districts.at[index, 'Lat Max'] = lat_max
    lng_min = district_info['northeast']['lng']
    districts.at[index, 'Long Min'] = lng_min
    lng_max = district_info['southwest']['lng']
    districts.at[index, 'Long Max'] = lng_max

In [34]:
for index, row in districts.iterrows():
    if row['Lat Min'] > row['Lat Max']:
        lat_max_new = row['Lat Min']
        districts.at[index, 'Lat Min'] = row['Lat Max']
        districts.at[index, 'Lat Max'] = lat_max_new
    if row['Long Min'] > row['Long Max']:
        lng_max_new = row['Long Min']
        districts.at[index, 'Long Min'] = row['Long Max']
        districts.at[index, 'Long Max'] = lng_max_new

In [40]:
# Determine the center of each of the 51 districts
for index, row in districts.iterrows():
    lat_center = (row['Lat Min'] + row['Lat Max']) / 2
    districts.at[index, 'Lat Center'] = lat_center
    lng_center = (row['Long Min'] + row['Long Max']) / 2
    districts.at[index, 'Long Center'] = lng_center

In [41]:
# Update csv
districts.to_csv('district_list_new.csv')

In [35]:
# Find 4 nearest readings to a given's district's center
def nearest_readings(df, lat, lng):
    # Determine the 2 closest lat values
    lat_filters = df.iloc[(df[0]-lat).abs().argsort()[:1]][0].values.tolist()
    if lat_filters[0] % 1 >= 0.5:
        lat_filter_second_nearest = lat_filters[0] - 0.5
    else: 
        lat_filter_second_nearest = lat_filters[0] + 0.5
    lat_filters.append(lat_filter_second_nearest)
    df_lat_filtered = df.loc[df[0].isin(lat_filters)]
    # Determine the 2 closest lat values
    df_lat_long_filtered = df_lat_filtered.iloc[(df_lat_filtered[1]-lng).abs().argsort()[:4]]
    return df_lat_long_filtered

In [None]:
def file_name_for_year(year):
    return 'precip.' + str(year) + '.txt'

In [44]:
# Configure output format
district_avg_precips = pd.DataFrame(columns=['District Name', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'])
# Main script
for year in range(2010, 2018):
    year_data = pd.read_csv(file_name_for_year(year), delim_whitespace=True, header=None)
    for index, district in districts.iterrows():
        district_lat = district['Lat Center']
        district_lng = district['Long Center']
        district_points = nearest_readings(year_data, district_lat, district_lng)
        print(district_points)
        district_points = district_points.append({0:district_lat, 1:district_lng}, ignore_index=True)
        for month in range(2,14):
            # Determine the monthly rainfall for a given district by averaging the four nearest readings
            interpolated_precip = mean(district_points[month].values.tolist()[:-1])
            district_points.at[4, month] = interpolated_precip
        district_avg_precips = district_avg_precips.append({
            'District Name': str(district['District Name'] + ' ' + str(year)),
            'January': district_points[2][4],
            'February': district_points[3][4],
            'March': district_points[4][4],
            'April': district_points[5][4],
            'May': district_points[6][4],
            'June': district_points[7][4],
            'July': district_points[8][4],
            'August': district_points[9][4],
            'September': district_points[10][4],
            'October': district_points[11][4],
            'November': district_points[12][4],
            'December': district_points[13][4]

        }, ignore_index=True)
        

          0      1     2    3     4     5     6    7     8     9     10    11  \
37804  17.25  77.25  81.4  2.1  11.1  13.3  11.1  6.6  18.3  10.2  25.0  56.7   
37803  17.25  77.75  88.0  0.0  18.5  16.2   8.4  2.5   4.6   0.0   9.9  43.7   
38014  17.75  77.75  85.5  0.0  18.0  15.9   8.8  3.8   6.7   0.2  11.5  44.6   
38013  17.75  78.25  74.4  0.0  14.9  12.1   9.5  8.4  10.4   9.0  13.4  39.3   

         12    13     14  
37804   9.8  29.5  275.1  
37803  14.4  31.8  238.0  
38014  14.9  31.2  241.1  
38013  16.5  25.6  233.5  
          0      1     2    3     4     5     6    7     8     9     10    11  \
37804  17.25  77.25  81.4  2.1  11.1  13.3  11.1  6.6  18.3  10.2  25.0  56.7   
37803  17.25  77.75  88.0  0.0  18.5  16.2   8.4  2.5   4.6   0.0   9.9  43.7   
38014  17.75  77.75  85.5  0.0  18.0  15.9   8.8  3.8   6.7   0.2  11.5  44.6   
38013  17.75  78.25  74.4  0.0  14.9  12.1   9.5  8.4  10.4   9.0  13.4  39.3   

         12    13     14  
37804   9.8  29.5  275.1  

In [45]:
# Export output
district_avg_precips.to_csv('district_avg_precips.csv')
