In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
df = pd.read_csv("hourly-weather-surface-brazil-southeast-region/sudeste.csv",
        header=0,
        nrows=10000
       )
cols = ['prcp','stp','smax','smin','gbrd','temp','dewp','tmax',
        'dmax','tmin','dmin','hmdy','hmax','hmin','wdsp','wdct','gust']
cols2 = ['stp','smax','smin','temp','dewp','tmax',
        'dmax','tmin','dmin','hmdy','hmax','hmin','wdsp','wdct','gust']
categorial = ['wsid','wsnm','elvt','lat','lon','inme','city','prov','yr','mo','da','hr']
categorial2 = ['yr','mo','da','hr']

df[categorial] = df[categorial].astype('category')

def del_notunique_cols(df):
    for col in categorial:
        if(len(df[col].unique())==1):
            df = df.drop(labels = [col],axis=1)
    return df

def dropna_rows(df,cols):
    for i in range(len(df.index)):
        if (all(np.isnan(df.loc[i][cols]))):
            df = df.drop(i,axis=0)   
    return df
    
df = df.drop(labels=["mdct","date"], axis=1)
df = del_notunique_cols(df)
df = pd.get_dummies(df, columns=categorial2)

def del_empty_rows(df,cols):
    df[cols] = df[cols].replace(0,np.nan)
    
    #My written function works not quickly
    #df = dropna_rows(df,cols)
    df = df.dropna(how="all",subset=cols)
    return df

def zeros_fillna(df,cols):
    #My written function (works not quickly)
    #for col in cols:
    #    for i in range(len(df[col])): 
    #        if (np.isnan(df[col].iloc[i])==True):
    #            df[col].iloc[i] = 0
    df[cols] = df[cols].fillna(0)
    return df

df = del_empty_rows(df,cols)
df = zeros_fillna(df,['prcp','gbrd'])

df_min = df.min()
df_max = df.max()
df_mean = df.mean()
df_std = df.std()

headers = list(df)
 
def dropna_cols(df):
    for col in list(df):
        if (any(np.isnan(df[col]))):
            df = df.drop(labels = [col],axis=1)
    return df

def mean_fillna(df,cols): 
    for col in cols:
        for i in range(len(df[col])): 
            if (np.isnan(df[col].iloc[i])==True):
                df[col].iloc[i] = df[col].mean()
    #df[cols] = df[cols].fillna(df[cols].mean())
    return df

def median_fillna(df,cols):
    #My written function (works not quickly)
    for col in cols:
        for i in range(len(df[col])): 
            if (np.isnan(df[col].iloc[i])==True):
                df[col].iloc[i] = df[col].median()
    #df[cols] = df[cols].fillna(df[cols].median())
    return df

def mode_fillna(df,cols):
    #My written function (works not quickly)
    for col in cols:
        for i in range(len(df[col])): 
            if (np.isnan(df[col].iloc[i])==True):
                df[col].iloc[i] = df[col].mode().iloc[0]
    #df[cols] = df[cols].fillna(df[cols].mode().iloc[0])
    return df

def standartize_minmax(df):
    df[headers[:17]] = (df[headers[:17]]-df_min[headers[:17]])/(df_max[headers[:17]]-df_min[headers[:17]])
    return df

def unstandartize_minmax(df):
    df[headers[:17]] = df[headers[:17]]*(df_max[headers[:17]]-df_min[headers[:17]])+df_min[headers[:17]]
    return df

def standartize_mean_std(df):
    df[headers[:17]] = (df[headers[:17]]-df_mean[headers[:17]])/df_std[headers[:17]]
    return df

def unstandartize_mean_std(df):
    df[headers[:17]] = df[headers[:17]]*df_std[headers[:17]]+df_mean[headers[:17]]
    return df

def KNN_fillna(df,use_date_features = False):
    headers = list(df)
    
    for target in headers[:17]:
        if (use_date_features == False):
            df0 = df[headers[:-69]]
        X = dropna_cols(df0.loc[:, df0.columns != target])
        Y = df0.loc[:, df0.columns == target]
        x_test = X.loc[df0[target].isna()]
        x_train = X.loc[df0[target].notna()]
        y_test = Y.loc[df0[target].isna()]
        y_train = Y.loc[df0[target].notna()]
        
        for row_num in y_test.index.tolist():
            row = x_test.loc[row_num]
            diff = x_train.sub(row.squeeze(),axis=1)
            distances = np.sqrt(np.square(diff).sum(axis=1))
            distances_sorted = distances.sort_values()
            k7_indexes = list(distances_sorted[:7].index)
            knn_values = y_train.loc[k7_indexes][target]
            k7_mean = knn_values.mean()

            print(target,"(",row_num,"):",round(k7_mean, 4))

            df[target][row_num] = round(k7_mean, 4)
    return df

def LR_fillna(df, use_date_features = True):
    headers = list(df)
    
    lr = LinearRegression()
    
    for target in headers[:17]:
        features = [item for item in headers if item != target]
        if (use_date_features == False):
            features = features[:-69]
        X = dropna_cols(df[features])
        X_train = X.loc[df[target].notna()]
        notna_features = list(X_train)
        y_train = df[target].loc[df[target].notna()]
        X_test = X[notna_features].loc[df[target].isna()]
        y_test = df[target].loc[df[target].isna()]
        if(len(y_test)!=0):
            lr = lr.fit(X_train, y_train)

            y_predicted = np.array(lr.predict(X_test))
            df[target].loc[df[target].isna()] = y_predicted
        else:
            continue
    return df


def checkPrediction(df):
    target = 'temp'
    features = [item for item in headers if item != target]
    X_train, X_test, y_train, y_test = train_test_split(df[features], df[target].astype('float64'), test_size=0.2)
    lr = LinearRegression()
    lr = lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    print(mean_squared_error(y_test, y_pred))

In [28]:
df_mm = standartize_minmax(df)

df_mm_knn = KNN_fillna(df_mm) 
df_mm_knn = unstandartize_minmax(df_mm_knn)

checkPrediction(df_mm_knn)

stp ( 8712 ): 0.495
smax ( 538 ): 0.497
smax ( 682 ): 0.5905
smax ( 802 ): 0.6318


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


smax ( 826 ): 0.5765
smax ( 849 ): 0.5584
smax ( 922 ): 0.5825
smax ( 994 ): 0.5895
smax ( 1018 ): 0.6308
smax ( 1042 ): 0.6137
smax ( 1066 ): 0.5483
smax ( 1090 ): 0.6217
smax ( 1186 ): 0.6056
smax ( 1210 ): 0.5795
smax ( 1234 ): 0.502
smax ( 1258 ): 0.5604
smax ( 1282 ): 0.6107
smax ( 1330 ): 0.5875
smax ( 1378 ): 0.664
smax ( 1402 ): 0.7113
smax ( 1426 ): 0.7384
smax ( 1450 ): 0.7485
smax ( 1474 ): 0.6338
smax ( 1498 ): 0.6177
smax ( 1522 ): 0.5342
smax ( 1546 ): 0.5221
smax ( 1570 ): 0.5644
smax ( 1594 ): 0.502
smax ( 1618 ): 0.5926
smax ( 1642 ): 0.4809
smax ( 1666 ): 0.5131
smax ( 1690 ): 0.5272
smax ( 1714 ): 0.4799
smax ( 1762 ): 0.6539
smax ( 1786 ): 0.5996
smax ( 2002 ): 0.5845
smax ( 2026 ): 0.4839
smax ( 2050 ): 0.3521
smax ( 2073 ): 0.2324
smax ( 2076 ): 0.3501
smax ( 2122 ): 0.5755
smax ( 2146 ): 0.7072
smax ( 2170 ): 0.666
smax ( 2194 ): 0.7334
smax ( 2218 ): 0.7465
smax ( 2242 ): 0.7706
smax ( 2266 ): 0.7696
smax ( 2314 ): 0.5815
smax ( 2338 ): 0.5865
smax ( 2362 ): 0.5

tmin ( 1618 ): 0.3718
tmin ( 1642 ): 0.5747
tmin ( 1666 ): 0.5297
tmin ( 1690 ): 0.5101
tmin ( 1714 ): 0.3816
tmin ( 1762 ): 0.4892
tmin ( 1786 ): 0.4892
tmin ( 2002 ): 0.4077
tmin ( 2026 ): 0.4984
tmin ( 2050 ): 0.5153
tmin ( 2073 ): 0.5427
tmin ( 2076 ): 0.5695
tmin ( 2122 ): 0.3888
tmin ( 2146 ): 0.4403
tmin ( 2170 ): 0.5108
tmin ( 2194 ): 0.4214
tmin ( 2218 ): 0.4214
tmin ( 2242 ): 0.4025
tmin ( 2266 ): 0.4012
tmin ( 2314 ): 0.4586
tmin ( 2338 ): 0.5114
tmin ( 2362 ): 0.3738
tmin ( 2377 ): 0.5134
tmin ( 2386 ): 0.4488
tmin ( 2410 ): 0.5584
tmin ( 2675 ): 0.4892
tmin ( 2794 ): 0.4083
tmin ( 2795 ): 0.4788
tmin ( 4281 ): 0.4123
tmin ( 4294 ): 0.4488
tmin ( 4305 ): 0.3614
tmin ( 4329 ): 0.3725
tmin ( 4353 ): 0.3979
tmin ( 4396 ): 0.3823
tmin ( 4399 ): 0.3627
tmin ( 4421 ): 0.3653
tmin ( 4422 ): 0.3699
tmin ( 4423 ): 0.3888
tmin ( 4437 ): 0.5121
tmin ( 4445 ): 0.422
tmin ( 4448 ): 0.379
tmin ( 4449 ): 0.4103
tmin ( 4489 ): 0.4351
tmin ( 4545 ): 0.3777
tmin ( 4569 ): 0.2159
tmin ( 4593 

wdsp ( 1821 ): 0.0814
wdsp ( 1822 ): 0.1246
wdsp ( 1842 ): 0.113
wdsp ( 1843 ): 0.0797
wdsp ( 1844 ): 0.1096
wdsp ( 1859 ): 0.1146
wdsp ( 1916 ): 0.0664
wdsp ( 1918 ): 0.0548
wdsp ( 2002 ): 0.0681
wdsp ( 2037 ): 0.0748
wdsp ( 2038 ): 0.1146
wdsp ( 2133 ): 0.0299
wdsp ( 2134 ): 0.1379
wdsp ( 2157 ): 0.1113
wdsp ( 2159 ): 0.1561
wdsp ( 2180 ): 0.1047
wdsp ( 2207 ): 0.1445
wdsp ( 2228 ): 0.1379
wdsp ( 2229 ): 0.1196
wdsp ( 2230 ): 0.0997
wdsp ( 2277 ): 0.0648
wdsp ( 2278 ): 0.113
wdsp ( 2297 ): 0.1827
wdsp ( 2301 ): 0.0847
wdsp ( 2302 ): 0.1312
wdsp ( 2303 ): 0.0997
wdsp ( 2324 ): 0.0698
wdsp ( 2325 ): 0.0615
wdsp ( 2326 ): 0.108
wdsp ( 2327 ): 0.1877
wdsp ( 2348 ): 0.088
wdsp ( 2349 ): 0.0897
wdsp ( 2350 ): 0.0714
wdsp ( 2362 ): 0.0781
wdsp ( 2363 ): 0.0714
wdsp ( 2372 ): 0.0781
wdsp ( 2373 ): 0.0864
wdsp ( 2374 ): 0.1096
wdsp ( 2386 ): 0.1528
wdsp ( 2422 ): 0.2043
wdsp ( 2446 ): 0.1262
wdsp ( 2459 ): 0.0698
wdsp ( 2483 ): 0.0814
wdsp ( 2506 ): 0.0864
wdsp ( 2516 ): 0.103
wdsp ( 2518 ): 

wdsp ( 4161 ): 0.0847
wdsp ( 4162 ): 0.0548
wdsp ( 4175 ): 0.1179
wdsp ( 4176 ): 0.0797
wdsp ( 4185 ): 0.0133
wdsp ( 4197 ): 0.093
wdsp ( 4198 ): 0.1063
wdsp ( 4203 ): 0.0748
wdsp ( 4206 ): 0.0797
wdsp ( 4207 ): 0.0648
wdsp ( 4208 ): 0.0432
wdsp ( 4209 ): 0.0748
wdsp ( 4210 ): 0.0847
wdsp ( 4232 ): 0.0332
wdsp ( 4244 ): 0.1013
wdsp ( 4245 ): 0.0781
wdsp ( 4246 ): 0.0532
wdsp ( 4247 ): 0.0714
wdsp ( 4248 ): 0.0548
wdsp ( 4249 ): 0.0615
wdsp ( 4253 ): 0.0615
wdsp ( 4254 ): 0.0897
wdsp ( 4255 ): 0.0681
wdsp ( 4256 ): 0.0748
wdsp ( 4257 ): 0.0648
wdsp ( 4281 ): 0.0515
wdsp ( 4294 ): 0.0847
wdsp ( 4295 ): 0.0581
wdsp ( 4296 ): 0.0598
wdsp ( 4305 ): 0.0166
wdsp ( 4329 ): 0.0615
wdsp ( 4330 ): 0.1096
wdsp ( 4353 ): 0.0365
wdsp ( 4378 ): 0.0648
wdsp ( 4396 ): 0.0233
wdsp ( 4399 ): 0.0498
wdsp ( 4400 ): 0.0399
wdsp ( 4401 ): 0.0432
wdsp ( 4402 ): 0.0449
wdsp ( 4417 ): 0.0831
wdsp ( 4419 ): 0.0299
wdsp ( 4421 ): 0.0482
wdsp ( 4422 ): 0.0432
wdsp ( 4425 ): 0.0216
wdsp ( 4426 ): 0.0449
wdsp ( 4437

wdsp ( 6528 ): 0.1047
wdsp ( 6530 ): 0.0714
wdsp ( 6533 ): 0.0814
wdsp ( 6534 ): 0.0648
wdsp ( 6535 ): 0.0299
wdsp ( 6536 ): 0.0432
wdsp ( 6537 ): 0.0548
wdsp ( 6553 ): 0.098
wdsp ( 6558 ): 0.0399
wdsp ( 6624 ): 0.0565
wdsp ( 6631 ): 0.0664
wdsp ( 6632 ): 0.0698
wdsp ( 6644 ): 0.1927
wdsp ( 6646 ): 0.1711
wdsp ( 6696 ): 0.1312
wdsp ( 6705 ): 0.0365
wdsp ( 6717 ): 0.0698
wdsp ( 6752 ): 0.0615
wdsp ( 6753 ): 0.0532
wdsp ( 6824 ): 0.0714
wdsp ( 6825 ): 0.0532
wdsp ( 6826 ): 0.0963
wdsp ( 6845 ): 0.1512
wdsp ( 6846 ): 0.1478
wdsp ( 6919 ): 0.1462
wdsp ( 6920 ): 0.2143
wdsp ( 7061 ): 0.1777
wdsp ( 7062 ): 0.1395
wdsp ( 7063 ): 0.2027
wdsp ( 7064 ): 0.1379
wdsp ( 7065 ): 0.0914
wdsp ( 7066 ): 0.113
wdsp ( 7103 ): 0.186
wdsp ( 7113 ): 0.1578
wdsp ( 7126 ): 0.1811
wdsp ( 7149 ): 0.1528
wdsp ( 7174 ): 0.1711
wdsp ( 7175 ): 0.2027
wdsp ( 7257 ): 0.093
wdsp ( 7258 ): 0.1512
wdsp ( 7295 ): 0.2492
wdsp ( 7297 ): 0.1196
wdsp ( 7298 ): 0.1645
wdsp ( 7299 ): 0.1478
wdsp ( 7319 ): 0.1927
wdsp ( 7328 ):

gust ( 4445 ): 0.0886
gust ( 4448 ): 0.0536
gust ( 4449 ): 0.0602
gust ( 4489 ): 0.1819
gust ( 4496 ): 0.166
gust ( 4509 ): 0.1184
gust ( 4545 ): 0.1892
gust ( 4569 ): 0.0668
gust ( 4593 ): 0.1475
gust ( 4616 ): 0.0794
gust ( 4617 ): 0.0754
gust ( 4629 ): 0.1898
gust ( 4632 ): 0.0886
gust ( 4635 ): 0.1005
gust ( 4641 ): 0.0747
gust ( 4654 ): 0.1409
gust ( 4657 ): 0.256
gust ( 4665 ): 0.0681
gust ( 4678 ): 0.0688
gust ( 4679 ): 0.1012
gust ( 4686 ): 0.0794
gust ( 4703 ): 0.121
gust ( 4713 ): 0.1019
gust ( 4725 ): 0.084
gust ( 4726 ): 0.0655
gust ( 4734 ): 0.0807
gust ( 4736 ): 0.0582
gust ( 4738 ): 0.0734
gust ( 4739 ): 0.1098
gust ( 4750 ): 0.1257
gust ( 4755 ): 0.0873
gust ( 4758 ): 0.1104
gust ( 4759 ): 0.0741
gust ( 4760 ): 0.078
gust ( 4798 ): 0.0668
gust ( 4853 ): 0.0542
gust ( 4857 ): 0.1164
gust ( 4870 ): 0.0853
gust ( 4881 ): 0.0648
gust ( 4903 ): 0.0701
gust ( 4976 ): 0.0681
gust ( 5070 ): 0.0701
gust ( 5071 ): 0.0866
gust ( 5072 ): 0.0966
gust ( 5073 ): 0.1303
gust ( 5092 ): 

In [32]:
#refresh data on second page

df_norm = standartize_mean_std(df)

df_norm_knn = KNN_fillna(df_norm) 
df_norm_knn = unstandartize_mean_std(df_norm_knn)

checkPrediction(df_norm_knn)

stp ( 8712 ): -0.3437
smax ( 538 ): -0.4337
smax ( 682 ): 0.1385
smax ( 802 ): 0.3445


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


smax ( 826 ): 0.0641
smax ( 849 ): -0.1019
smax ( 922 ): 0.0584
smax ( 994 ): 0.0984
smax ( 1018 ): 0.3731
smax ( 1042 ): 0.23
smax ( 1066 ): -0.1019
smax ( 1090 ): 0.3044
smax ( 1186 ): 0.2815
smax ( 1210 ): 0.0183
smax ( 1234 ): -0.3765
smax ( 1258 ): -0.0675
smax ( 1282 ): 0.293
smax ( 1330 ): 0.1728
smax ( 1378 ): 0.5047
smax ( 1402 ): 0.8823
smax ( 1426 ): 1.0139
smax ( 1450 ): 1.0082
smax ( 1474 ): 0.4246
smax ( 1498 ): 0.3902
smax ( 1522 ): -0.1877
smax ( 1546 ): -0.222
smax ( 1570 ): -0.0217
smax ( 1594 ): -0.3422
smax ( 1618 ): 0.0927
smax ( 1642 ): -0.4452
smax ( 1666 ): -0.3136
smax ( 1690 ): -0.2735
smax ( 1714 ): -0.531
smax ( 1762 ): 0.4818
smax ( 1786 ): 0.1842
smax ( 2002 ): 0.0698
smax ( 2026 ): -0.4738
smax ( 2050 ): -1.2062
smax ( 2073 ): -1.9157
smax ( 2076 ): -1.0975
smax ( 2122 ): 0.0526
smax ( 2146 ): 0.8022
smax ( 2170 ): 0.5218
smax ( 2194 ): 0.9968
smax ( 2218 ): 1.054
smax ( 2242 ): 1.2085
smax ( 2266 ): 1.1398
smax ( 2314 ): 0.0526
smax ( 2338 ): 0.127
smax 

tmin ( 1378 ): -0.2903
tmin ( 1402 ): -0.0074
tmin ( 1426 ): -0.0467
tmin ( 1450 ): -0.1135
tmin ( 1474 ): -0.3924
tmin ( 1498 ): -0.2706
tmin ( 1522 ): 0.1929
tmin ( 1546 ): -0.4592
tmin ( 1570 ): -0.4788
tmin ( 1594 ): -0.2824
tmin ( 1618 ): -1.021
tmin ( 1642 ): 0.024
tmin ( 1666 ): -0.2863
tmin ( 1690 ): -0.2706
tmin ( 1714 ): -0.9738
tmin ( 1762 ): -0.3296
tmin ( 1786 ): -0.3806
tmin ( 2002 ): -0.8953
tmin ( 2026 ): -0.2706
tmin ( 2050 ): -0.2392
tmin ( 2073 ): -0.1488
tmin ( 2076 ): 0.0044
tmin ( 2122 ): -1.0367
tmin ( 2146 ): -0.7106
tmin ( 2170 ): -0.2274
tmin ( 2194 ): -0.8088
tmin ( 2218 ): -0.8049
tmin ( 2242 ): -0.5299
tmin ( 2266 ): -0.9778
tmin ( 2314 ): -0.9503
tmin ( 2338 ): -0.2156
tmin ( 2362 ): -1.0838
tmin ( 2377 ): -0.251
tmin ( 2386 ): -0.691
tmin ( 2410 ): -0.2981
tmin ( 2675 ): -0.5535
tmin ( 2794 ): -0.8403
tmin ( 2795 ): -0.2824
tmin ( 4281 ): -0.9188
tmin ( 4294 ): -0.691
tmin ( 4305 ): -1.2096
tmin ( 4329 ): -1.1703
tmin ( 4353 ): -0.9974
tmin ( 4396 ): -1.0

wdsp ( 1221 ): -1.0577
wdsp ( 1292 ): -1.0322
wdsp ( 1293 ): -1.0067
wdsp ( 1294 ): -0.6246
wdsp ( 1295 ): 0.1141
wdsp ( 1364 ): -0.943
wdsp ( 1462 ): -1.236
wdsp ( 1510 ): -0.8666
wdsp ( 1580 ): -0.2298
wdsp ( 1581 ): -1.2105
wdsp ( 1618 ): -1.4779
wdsp ( 1629 ): -1.3124
wdsp ( 1690 ): -0.3954
wdsp ( 1724 ): -1.1978
wdsp ( 1773 ): -1.0322
wdsp ( 1774 ): -1.0449
wdsp ( 1798 ): -0.8284
wdsp ( 1819 ): -0.9048
wdsp ( 1820 ): -1.2614
wdsp ( 1821 ): -0.9812
wdsp ( 1822 ): -0.4463
wdsp ( 1842 ): -0.6501
wdsp ( 1843 ): -1.4015
wdsp ( 1844 ): -0.7775
wdsp ( 1859 ): -0.1534
wdsp ( 1916 ): -1.1978
wdsp ( 1918 ): -1.2869
wdsp ( 2002 ): -1.2996
wdsp ( 2037 ): -1.1341
wdsp ( 2038 ): -0.9685
wdsp ( 2133 ): -1.4907
wdsp ( 2134 ): -1.0194
wdsp ( 2157 ): -0.4845
wdsp ( 2159 ): -0.3317
wdsp ( 2180 ): -0.4591
wdsp ( 2207 ): -0.2935
wdsp ( 2228 ): -0.9812
wdsp ( 2229 ): -0.6501
wdsp ( 2230 ): -0.943
wdsp ( 2277 ): -1.3378
wdsp ( 2278 ): -0.6883
wdsp ( 2297 ): -0.1534
wdsp ( 2301 ): -1.0194
wdsp ( 2302 ): 

wdsp ( 4058 ): -1.4779
wdsp ( 4060 ): -1.3251
wdsp ( 4063 ): -1.4779
wdsp ( 4064 ): -1.4397
wdsp ( 4065 ): -1.3633
wdsp ( 4066 ): -1.236
wdsp ( 4078 ): -1.3124
wdsp ( 4079 ): -1.5289
wdsp ( 4080 ): -1.4779
wdsp ( 4082 ): -1.3378
wdsp ( 4083 ): -1.4015
wdsp ( 4084 ): -1.3888
wdsp ( 4085 ): -1.2487
wdsp ( 4101 ): -1.2487
wdsp ( 4103 ): -1.0959
wdsp ( 4104 ): -1.3633
wdsp ( 4105 ): -1.4779
wdsp ( 4106 ): -1.3251
wdsp ( 4107 ): -1.3888
wdsp ( 4112 ): -1.2996
wdsp ( 4113 ): -1.3506
wdsp ( 4114 ): -0.7647
wdsp ( 4127 ): -0.9176
wdsp ( 4128 ): -0.994
wdsp ( 4130 ): -1.3251
wdsp ( 4132 ): -1.236
wdsp ( 4133 ): -1.2105
wdsp ( 4135 ): -1.3251
wdsp ( 4136 ): -1.4015
wdsp ( 4137 ): -1.4907
wdsp ( 4150 ): -1.1723
wdsp ( 4153 ): -1.4525
wdsp ( 4154 ): -1.3506
wdsp ( 4157 ): -1.4652
wdsp ( 4158 ): -1.5034
wdsp ( 4159 ): -1.5034
wdsp ( 4160 ): -1.4525
wdsp ( 4161 ): -1.1086
wdsp ( 4162 ): -1.1341
wdsp ( 4175 ): -0.7647
wdsp ( 4176 ): -0.943
wdsp ( 4185 ): -1.6053
wdsp ( 4197 ): -1.0577
wdsp ( 4198 ): 

wdsp ( 6009 ): -1.4652
wdsp ( 6029 ): -1.1468
wdsp ( 6030 ): -1.5671
wdsp ( 6031 ): -1.5671
wdsp ( 6032 ): -1.5162
wdsp ( 6033 ): -1.5162
wdsp ( 6034 ): -1.3251
wdsp ( 6054 ): -1.4525
wdsp ( 6055 ): -1.5034
wdsp ( 6057 ): -1.4907
wdsp ( 6058 ): -1.0831
wdsp ( 6098 ): -0.5355
wdsp ( 6118 ): -0.7902
wdsp ( 6119 ): -0.2553
wdsp ( 6144 ): -0.8793
wdsp ( 6213 ): -1.2742
wdsp ( 6214 ): -0.9685
wdsp ( 6218 ): -0.8793
wdsp ( 6220 ): -1.0194
wdsp ( 6225 ): -1.2232
wdsp ( 6226 ): -1.2996
wdsp ( 6247 ): -1.5798
wdsp ( 6248 ): -1.5798
wdsp ( 6249 ): -1.5798
wdsp ( 6250 ): -1.0577
wdsp ( 6270 ): -1.2742
wdsp ( 6272 ): -1.4907
wdsp ( 6273 ): -1.5034
wdsp ( 6274 ): -1.3124
wdsp ( 6297 ): -1.5798
wdsp ( 6298 ): -1.0194
wdsp ( 6316 ): -0.8921
wdsp ( 6317 ): -0.752
wdsp ( 6318 ): -1.1978
wdsp ( 6319 ): -1.185
wdsp ( 6334 ): 0.4197
wdsp ( 6335 ): -0.5992
wdsp ( 6344 ): -0.9048
wdsp ( 6369 ): -0.7902
wdsp ( 6417 ): -0.8793
wdsp ( 6418 ): -0.8921
wdsp ( 6465 ): -1.3506
wdsp ( 6480 ): -1.0704
wdsp ( 6486 ):

gust ( 3321 ): -1.4388
gust ( 3370 ): -1.3158
gust ( 3414 ): -1.3861
gust ( 3415 ): -1.1751
gust ( 3532 ): -1.3333
gust ( 3655 ): -1.3275
gust ( 3702 ): -1.5033
gust ( 3703 ): -1.1927
gust ( 3704 ): -1.4505
gust ( 3705 ): -0.9173
gust ( 3706 ): -1.1224
gust ( 3730 ): -0.8822
gust ( 3743 ): -0.9935
gust ( 3747 ): -1.1869
gust ( 3774 ): -1.4564
gust ( 3775 ): -1.3626
gust ( 3776 ): -1.2806
gust ( 3777 ): -1.5326
gust ( 3844 ): -1.3451
gust ( 3897 ): -1.3568
gust ( 3911 ): -1.4681
gust ( 3934 ): -1.1751
gust ( 3941 ): -1.5794
gust ( 3942 ): -1.638
gust ( 3961 ): -1.3744
gust ( 3962 ): -1.2044
gust ( 3963 ): -1.4095
gust ( 4017 ): -1.4798
gust ( 4085 ): -1.1341
gust ( 4114 ): -0.9173
gust ( 4128 ): -1.4681
gust ( 4133 ): -1.2337
gust ( 4136 ): -1.4681
gust ( 4137 ): -1.2103
gust ( 4160 ): -1.474
gust ( 4161 ): -1.3861
gust ( 4162 ): -0.9056
gust ( 4176 ): -1.2689
gust ( 4246 ): -1.4974
gust ( 4247 ): -1.4154
gust ( 4249 ): -1.263
gust ( 4253 ): -1.3216
gust ( 4256 ): -1.5911
gust ( 4257 ):

In [34]:
#refresh data on second page

df_norm = standartize_mean_std(df)

df_norm_lr = LR_fillna(df_norm) 
df_norm_lr = unstandartize_mean_std(df_norm_lr)

checkPrediction(df_norm_lr)

prcp
stp


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


smax
smin
gbrd
temp
dewp
tmax
dmax
tmin
dmin
hmdy
hmax
hmin
wdsp
wdct
gust
0.05591471719506286


In [38]:
#refresh data on second page

df_mm = standartize_minmax(df)

df_mm_lr = LR_fillna(df_mm) 
df_mm_lr = unstandartize_minmax(df_mm_lr)

checkPrediction(df_mm_lr)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


0.057741817745349436


In [39]:
#refresh data on second page

df_mm = standartize_minmax(df)

df_mm_mean = mean_fillna(df_mm, headers[:-69]) 
df_mm_mean = unstandartize_minmax(df_mm_mean)

checkPrediction(df_mm_mean)

0.05586768610083803


In [4]:
#refresh data on second page

df_mm = standartize_minmax(df)

df_mm_median = median_fillna(df_mm, headers[:-69]) 
df_mm_median = unstandartize_minmax(df_mm_median)

checkPrediction(df_mm_median)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


0.06280026620856213


Unnamed: 0,prcp,stp,smax,smin,gbrd,temp,dewp,tmax,dmax,tmin,...,hr_14,hr_15,hr_16,hr_17,hr_18,hr_19,hr_20,hr_21,hr_22,hr_23
0,0.0,982.5,982.5,981.3,0.000,29.3,12.1,29.7,16.8,25.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,983.2,983.2,982.5,0.000,29.0,13.5,29.9,13.6,29.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,983.5,983.5,983.2,0.000,27.4,14.0,29.0,14.0,27.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,983.7,983.7,983.4,0.000,25.8,16.9,27.4,16.9,25.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,983.7,983.8,983.6,0.000,25.4,16.4,26.3,17.0,25.3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,983.7,983.8,983.6,0.000,23.8,16.2,25.4,16.4,23.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,983.7,983.7,983.6,0.000,22.0,16.7,23.8,16.7,22.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,984.6,984.6,983.7,0.000,19.7,17.4,22.0,17.8,19.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,985.7,985.7,984.6,0.000,18.3,17.3,19.7,17.3,18.3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,986.7,986.7,985.7,214.149,22.9,18.3,22.9,18.3,18.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,prcp,stp,smax,smin,gbrd,temp,dewp,tmax,dmax,tmin,...,hr_14,hr_15,hr_16,hr_17,hr_18,hr_19,hr_20,hr_21,hr_22,hr_23
count,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,...,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0,8643.0
mean,0.002621,0.561388,0.569502,0.554682,0.270823,0.567633,0.550327,0.568938,0.509244,0.562196,...,0.048247,0.048131,0.048131,0.048131,0.048131,0.048131,0.0479,0.046049,0.042809,0.038065
std,0.029006,0.175685,0.175092,0.176322,0.316553,0.16551,0.133494,0.166324,0.151758,0.165846,...,0.2143,0.214056,0.214056,0.214056,0.214056,0.214056,0.213567,0.209603,0.202438,0.191365
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.4375,0.450704,0.433566,0.0,0.442478,0.4625,0.436681,0.404878,0.43379,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.569444,0.577465,0.566434,0.10657,0.566372,0.566667,0.567686,0.521951,0.557078,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.6875,0.697183,0.685315,0.550462,0.690265,0.6375,0.694323,0.604878,0.680365,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [172]:
df.to_csv(path_or_buf="lr_predicted_m_std.csv", sep='\t', na_rep='', header=True)