# Yearly average classification table

This notebook combines the model predicitons and the grid point categories to produce a table of the improvements when going from

* V15 ---> V20
* V15 ---> V20X

This improvement is denoted by `delta` i.e. delta = V20_prediction_error - V15_prediction_error

#### Load all the data


In [4]:
import pandas as pd
V15 = pd.read_pickle('tmp_data/V15_predictions_yearly_average.pkl')
V20 = pd.read_pickle('tmp_data/V20_predictions_yearly_average.pkl')
V20X= pd.read_pickle('tmp_data/V20X_predictions_yearly_average.pkl')

change_in_fields = pd.read_pickle('tmp_data/change_in_fields.pkl')

In [5]:
V20

Unnamed: 0,latitude_ERA,longitude_ERA,MODIS_LST,predictions,predicion_bias,predicion_error
0,-70.117048,-75.75,256.971771,258.764496,-1.792742,4.774697
1,-70.117048,-75.00,256.868408,257.034821,-0.166410,4.524141
2,-70.117048,-72.00,255.929306,257.770081,-1.840781,5.922895
3,-70.117048,-71.25,254.781509,257.310944,-2.529431,5.754484
4,-70.117048,-70.50,255.270538,258.086243,-2.815704,5.888017
...,...,...,...,...,...,...
149805,70.117048,168.00,264.279388,264.485321,-0.205930,3.708954
149806,70.117048,168.75,263.243958,263.287903,-0.043926,3.540926
149807,70.117048,170.25,266.316162,262.690735,3.625420,5.818576
149808,70.117048,171.00,266.383087,264.261658,2.121440,5.332411


#### Join it together

In [6]:
#Create a new df that will just hold the differences   
data = {'latitude_ERA':    V20.latitude_ERA, 
        'longitude_ERA':   V20.longitude_ERA,
        'MODIS_LST':       V20.MODIS_LST,
        
        'V15_prediction':  V15.predictions,
        'V20_prediction':  V20.predictions,
        'V20X_prediction': V20X.predictions,

        'V15_error':       V15.predicion_error,
        'V20_error':       V20.predicion_error,
        'V20X_error':      V20X.predicion_error,
        'delta':           V20.predicion_error - V15.predicion_error,
        'deltaX':          V20X.predicion_error - V15.predicion_error}

df = pd.DataFrame(data)   

In [7]:
#Apply the mask to the global error_delta_field from earlier
df_cat = pd.merge(df,change_in_fields,how='inner',left_on=['latitude_ERA', 'longitude_ERA'], right_on=['latitude', 'longitude']) #inner join.

In [17]:
#Save it
df_cat.to_pickle('tmp_data/clean_yearly_data.pkl')

#### Create a classification table

In [13]:



from scipy.stats import ttest_ind
import numpy as np

def significance_test(v1,v2):
    try:
        return ttest_ind(v1, v2)
    except:
        return np.nan

def significance_boolean(x):
    
    try:
        pval = x[-1]
    except:
        #Nans are not subscriptable
        return 'Insignificant'
    
    if (pval > 0.01) or np.isnan(pval): 
        return 'Insignificant'
    else:
        return 'Significant'


def create_classification_table(ds,table_type):

    if table_type == 'V20':
        q = 'delta'
        x1 = 'V20_error'
    if table_type == 'V20X':
        q = 'deltaX'
        x1 = 'V20X_error'


    classification_table                                = ds.groupby('bitstring').agg(**{'Number of Pixels':pd.NamedAgg(q,'size'),'AverageDelta':pd.NamedAgg(q,'mean')})     # For each group, count number of pixels and get the average delta      
    classification_table["Percentage"]                  = 100.0* classification_table['Number of Pixels'] / sum(classification_table["Number of Pixels"])                                            # Express number of pixels as a percentage
    
    classification_table['Stats (t-statistic,p-value)'] = ds.groupby('bitstring').apply(lambda x: significance_test(x['V15_error'], x[x1]))                                     # For each group, do a ttest between the V15_errors and the V20_errors
    classification_table['Significant Change?']         = classification_table['Stats (t-statistic,p-value)'].apply(lambda x: significance_boolean(x))

    return classification_table[['Number of Pixels', 'Percentage','AverageDelta','Stats (t-statistic,p-value)','Significant Change?']].sort_values(by=['Significant Change?', 'Number of Pixels'],ascending=False)







In [14]:
table_v20 = create_classification_table(df_cat,'V20')
table_v20X = create_classification_table(df_cat,'V20X')

In [15]:
table_v20

Unnamed: 0_level_0,Number of Pixels,Percentage,AverageDelta,"Stats (t-statistic,p-value)",Significant Change?
bitstring,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
100,135005,90.117482,-0.020074,"(5.6015351428002385, 2.1266764789306387e-08)",Significant
0,9119,6.087044,-0.051633,"(3.025451326903979, 0.0024860820906935925)",Significant
100110,1512,1.009278,-0.33601,"(6.428846519940273, 1.4890045307006747e-10)",Significant
101,834,0.556705,-0.146436,"(2.603472646240921, 0.00931042147086133)",Significant
11100,18,0.012015,3.905514,"(-4.470645229389644, 8.249553308371896e-05)",Significant
10100,13,0.008678,7.586944,"(-13.562981643142237, 9.521420939065104e-13)",Significant
101100,8,0.00534,0.486319,"(-3.3043994483637684, 0.00521642231127351)",Significant
100100,2392,1.596689,-0.053458,"(2.174490351294765, 0.02971719330884196)",Insignificant
110,247,0.164876,-0.151713,"(1.5120205599473904, 0.1311706602165086)",Insignificant
100000,234,0.156198,-0.19249,"(1.996783817109343, 0.04642985343989715)",Insignificant


In [16]:
table_v20X

Unnamed: 0_level_0,Number of Pixels,Percentage,AverageDelta,"Stats (t-statistic,p-value)",Significant Change?
bitstring,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
100,135005,90.117482,-0.081465,"(23.56030193871013, 1.3102858471844417e-122)",Significant
0,9119,6.087044,-0.104934,"(6.350991251617155, 2.189890540772654e-10)",Significant
100100,2392,1.596689,-0.111606,"(4.718440796562972, 2.4443631137646333e-06)",Significant
100110,1512,1.009278,-0.395243,"(7.593134806378133, 4.135401079128569e-14)",Significant
101,834,0.556705,-0.284028,"(4.890430864789042, 1.1031513823102672e-06)",Significant
100000,234,0.156198,-0.270278,"(2.896794175583813, 0.003947403841422364)",Significant
110,247,0.164876,-0.194843,"(2.0249189795464706, 0.043415219271146346)",Insignificant
111,114,0.076096,-0.082715,"(0.6609266848660991, 0.5093324991958698)",Insignificant
100010,104,0.069421,-0.19276,"(1.7034173947827063, 0.0899980578333079)",Insignificant
1,43,0.028703,-0.247474,"(1.00072653938051, 0.3198331285204762)",Insignificant


---