# Clean Up Weather Data Downloaded from GEE

Date: 09/01/2022

Note: Please install our open-source package, py4openag, 
with code `!pip install py4openag` before importing the packages. 
This package aims to establish the ML4Ops pipeline from 
public databases to integrated analytics for agriculture. 
It provides a variety of functions to study climate trends 
and simplify the calculation of commonly used metrics in 
agriculture, such as growing degree days, extreme heat degree 
days, the base temperature for different crop types, as well 
as basic climate metrics like average temperature and total 
precipitation for user specified time periods. The package 
also includes unsupervised and supervised learning based on 
these metrics.

In [None]:
import os
import pandas as pd
import numpy as np
from datetime import datetime as dt
import py4openag
from py4openag import functions

In [None]:
mylist = []
threshold_gdd = 281.15 # 8 degree celsius to kelvin
threshold_edd = 303.15 # 30 degree celsius to kelvin

for y in range(2001, 2021):
    print(y)
    cwd = os.path.dirname(os.getcwd())
    filename = 'Data_original\\weather'+ str(y) + '_corn.csv'
    location = os.path.join(cwd, filename)
    df = pd.read_csv(location, header=0)
    df.drop(columns=['system:index', 'COUNTYNS', 'AFFGEOID', 
                     'ALAND', 'AWATER', 'GEOID', 
                     'LSAD', 'dewpoint_2m_temperature', 
                     'mean_sea_level_pressure', 
                     'surface_pressure', 'u_component_of_wind_10m', 
                     'v_component_of_wind_10m', '.geo'], 
            inplace=True)
    df.dropna(thresh=5, inplace=True)
    df.dropna(subset=['Date'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['Date'] = [dt.utcfromtimestamp(x/1000) for x in df['Date']]

    grouping = df.groupby(['STATEFP', 'COUNTYFP'])

    for i in grouping:

        df = i[1]
        index = i[0]
        agfunctions=functions(df)
        d = {}
        d['year'] = y
        d['countyfp'] = index[1]
        d['statefp'] = index[0]
        d['prcp'] = agfunctions.total_precipitation(df, y)
        d['gdd'] = agfunctions.growing_degree_days(df, 
                                                   y, threshold_gdd)
        d['edd'] = agfunctions.extreme_degree_days(df, 
                                                   threshold_edd, 
                                                   y, [1,12])
        mylist.append(d)

In [None]:
output_df = pd.DataFrame(mylist)
filename = 'Data_original\\corn_weather.csv'
location = os.path.join(cwd, filename)
output_df.to_csv(location)

In [None]:
output_df.corr()

In [None]:
from scipy.stats import pearsonr
import numpy as np
rho = output_df.corr()
pval = output_df.corr(method=lambda x, y: pearsonr(x, y)[1]) - np.eye(*rho.shape)
p = pval.applymap(lambda x: ''.join(['*' for t in [.05, .01, .001] if x<=t]))
rho.round(2).astype(str) + p