In [None]:
!pip install imdlib

In [None]:
import imdlib as imd
import numpy as np
from scipy.spatial import distance
import pandas as pd
from tabulate import tabulate
import itertools

In [None]:
file_dir = '/content/drive/MyDrive/GHG Emissions Paddy Fields Project/Data'

##Rainfall data
- Units of rainfall: mm
- Cell size: 0.25• X 0.25•
- Latitude range: 6.5•N - 38.5•N
- Longitude range: 66.5•E - 100.0•E
- Grid size: 129 X 135 

##Temperature data
- Units of temperature: •C
- Cell size: 1• X 1•
- Latitude range: 7.5•N - 37.5•N
- Longitude range: 67.5•E - 97.5•E
- Grid size: 31 x 31


In [None]:
ds_rain = imd.open_data('rain', 2017, 2017,'yearwise', file_dir)
ds_tmin = imd.open_data('tmin', 2017, 2017,'yearwise', file_dir)
ds_tmax = imd.open_data('tmax', 2017, 2017,'yearwise', file_dir)

In [None]:
rainfall = ds_rain.get_xarray()['rain']

In [None]:
skipped_rain = []
rain_dfs = {}

In [None]:
for cell in list(itertools.product(np.arange(0,129), np.arange(0,135))):
  rain_array = rainfall[:, cell[0], cell[1]].data
  if((rain_array == -999.0).sum() >= 180):
    skipped_rain.append(cell)
  else:
    lat = str(6.5 + cell[0]*0.25)
    lon = str(66.5 + cell[1]*0.25)
    rain_df = pd.DataFrame(rain_array, columns=['rainfall'])
    rain_dfs[f"{lat}-{lon}"] = rain_df

In [None]:
max_temp = ds_tmax.get_xarray()['tmax']
skipped_max_temp = []
max_temp_dfs = {}

In [None]:
for cell in list(itertools.product(np.arange(0,31), np.arange(0,31))):
  max_temp_array = max_temp[:, cell[0], cell[1]].data
  if((max_temp_array >= 99.9).sum() >= 180):
    skipped_max_temp.append(cell)
  else:
    lat = str(7.5 + cell[0])
    lon = str(67.5 + cell[1])
    max_temp_df = pd.DataFrame(max_temp_array, columns=['max_temp'])
    max_temp_dfs[f"{lat}-{lon}"] = max_temp_df

In [None]:
min_temp = ds_tmin.get_xarray()['tmin']
skipped_min_temp = []
min_temp_dfs = {}

In [None]:
for cell in list(itertools.product(np.arange(0,31), np.arange(0,31))):
  min_temp_array = min_temp[:, cell[0], cell[1]].data
  if((min_temp_array >= 99.9).sum() >= 180):
    skipped_min_temp.append(cell)
  else:
    lat = str(7.5 + cell[0])
    lon = str(67.5 + cell[1])
    min_temp_df = pd.DataFrame(min_temp_array, columns=['min_temp'])
    min_temp_dfs[f"{lat}-{lon}"] = min_temp_df

In [None]:
temperature_dfs = {}

In [None]:
#Joining min and max temp dfs
for key in max_temp_dfs.keys() & min_temp_dfs.keys():
  temperature_dfs[key] = max_temp_dfs[key].join(min_temp_dfs[key])

In [None]:
def closest(lst, K):
    return lst[min(range(len(lst)), key = lambda i: distance.euclidean(lst[i],K))]

In [None]:
temp_coords = []
for key in temperature_dfs:
      lat = float(key.split('-')[0])
      lon =  float(key.split('-')[1])
      temp_coords.append((lat, lon))

In [None]:
climate_dfs = {}
for key in rain_dfs:
    lat = float(key.split('-')[0])
    lon =  float(key.split('-')[1])
    nearest_temp_df = closest(temp_coords, (lat, lon))
    climate_dfs[f"{key}_{nearest_temp_df[0]}-{nearest_temp_df[1]}"] = rain_dfs[key].join(temperature_dfs[f"{nearest_temp_df[0]}-{nearest_temp_df[1]}"])

In [None]:
def to_fwf(df, fname, name):
    content = tabulate(df.values.tolist(), tablefmt="plain", showindex='never', numalign='left')
    f = open(fname, "w")
    f.write(name+'\n')
    f.write(content)
    f.close()

In [None]:
for key, df in climate_dfs.items():
  df['index1'] = np.arange(1,366)
  df = df[['index1', 'max_temp', 'min_temp', 'rainfall']]
  to_fwf(df, f'/content/drive/MyDrive/GHG Emissions Paddy Fields Project/Climate Files/{key}.txt', key)