In [54]:
import pandas as pd
import numpy as np
from statsmodels.regression.linear_model import OLS
from statsmodels.discrete.discrete_model import Logit
import matplotlib.pyplot as plt

In [3]:
# Params and dates studied
params = ['HR_AVG_1.5m','TA_AVG_0.1m','PP_SUM_1.5m']
begin = '01/05/2022'
end = '01/09/2022'

In [3]:
DF = pd.read_json('https://servizos.meteogalicia.gal/mgrss/observacion/datosMensuaisEstacionsMeteo.action?idParam='+params[0]+','+params[1]+','+params[2]+'&dataIni='+begin+'&dataFin='+end)

In [4]:
# Export XY coordinates of meteorological stations
pd.DataFrame(DF.iloc[0][0]['listaEstacions'])[['estacion','idEstacion','utmx','utmy']].to_csv('Estaciones_XY.csv')

In [5]:
# Create a DataFrame with values of each parameter
df_data = []

for k in range(len(DF)): #Iteration over 12 months
    for i in range(len(pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'])): #Iteration over 155 stations
        
        date = DF.iloc[k][0]['data'][:7]
        idEst = DF.iloc[k][0]['listaEstacions'][i]['idEstacion']
        nameEst = DF.iloc[k][0]['listaEstacions'][i]['estacion']
        
        if (len(pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i]) == 3): # Take only data of stations that have Temp, Prec  HR
            if ((pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i][1]['valor'] > 0) 
                & (pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i][2]['valor'] > -9990)
                & (pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i][0]['valor'] > 0)): #Take only data without measurement errors
                
                df_data.append([date, idEst, nameEst, 
                                pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i][0]['valor'], 
                                pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i][1]['valor'],
                                pd.DataFrame(DF.iloc[k][0]['listaEstacions'])['listaMedidas'].iloc[i][2]['valor']
                               ])
            
df_data = pd.DataFrame(df_data)
df_data.columns = ['date','id','name','RH_AVG_1.5','PP_SUM','TA_AVG_0.1m']

In [82]:
# Export aggregated parameters (RH and TA = aggregated by mean) (PP aggregated by sum)
df_data.groupby('id').mean().iloc[:,[0,2]].to_csv('RH_n_TA_AVG.csv')
prec = pd.DataFrame(df_data.groupby('id').sum().iloc[:,1])
prec['PP_SUM'] = prec['PP_SUM']/4
prec.to_csv('PP_sum.csv')

In [7]:
df_data.describe()

Unnamed: 0,id,RH_AVG_1.5,PP_SUM,TA_AVG_0.1m
count,540.0,540.0,540.0,540.0
mean,13717.357407,76.681481,49.676667,19.13787
std,4380.81917,9.081901,39.557319,3.264911
min,10045.0,37.0,0.3,10.32
25%,10105.0,71.0,15.75,16.89
50%,10162.0,78.0,43.4,18.765
75%,19026.0,83.0,73.8,21.4325
max,19074.0,95.0,251.8,27.76


In [83]:
DF = pd.read_excel('BurneArea_RegressionData.xls').iloc[:,[2, 10, 15, 20, 25]].dropna()
DF.columns = ['BurnedArea','MeanTemp','MeanRH','MeanPrec','MeanSlope']
DF['Constant'] = np.ones(len(DF))

lin_model = OLS(DF['BurnedArea'], DF[['MeanTemp','MeanRH','MeanPrec','MeanSlope']]).fit()

lin_model.summary()

0,1,2,3
Dep. Variable:,BurnedArea,R-squared (uncentered):,0.491
Model:,OLS,Adj. R-squared (uncentered):,0.468
Method:,Least Squares,F-statistic:,21.92
Date:,"Tue, 04 Oct 2022",Prob (F-statistic):,1.09e-12
Time:,21:42:22,Log-Likelihood:,-910.89
No. Observations:,95,AIC:,1830.0
Df Residuals:,91,BIC:,1840.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MeanTemp,-175.2356,199.264,-0.879,0.381,-571.049,220.578
MeanRH,65.9213,67.246,0.980,0.330,-67.654,199.496
MeanPrec,-23.0628,9.852,-2.341,0.021,-42.632,-3.493
MeanSlope,301.6451,45.059,6.695,0.000,212.142,391.148

0,1,2,3
Omnibus:,10.743,Durbin-Watson:,2.409
Prob(Omnibus):,0.005,Jarque-Bera (JB):,10.88
Skew:,0.746,Prob(JB):,0.00434
Kurtosis:,3.724,Cond. No.,144.0


In [49]:
lin_model = OLS(DF['BurnedArea'], DF[['MeanRH']]).fit()

lin_model.summary()

0,1,2,3
Dep. Variable:,BurnedArea,R-squared (uncentered):,0.185
Model:,OLS,Adj. R-squared (uncentered):,0.176
Method:,Least Squares,F-statistic:,21.3
Date:,"Tue, 04 Oct 2022",Prob (F-statistic):,1.24e-05
Time:,20:43:01,Log-Likelihood:,-933.24
No. Observations:,95,AIC:,1868.0
Df Residuals:,94,BIC:,1871.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MeanRH,28.6578,6.209,4.615,0.000,16.329,40.986

0,1,2,3
Omnibus:,39.505,Durbin-Watson:,2.172
Prob(Omnibus):,0.0,Jarque-Bera (JB):,69.05
Skew:,1.878,Prob(JB):,1.01e-15
Kurtosis:,4.828,Cond. No.,1.0


In [50]:
lin_model = OLS(DF['BurnedArea'], DF[['MeanPrec']]).fit()

lin_model.summary()

0,1,2,3
Dep. Variable:,BurnedArea,R-squared (uncentered):,0.156
Model:,OLS,Adj. R-squared (uncentered):,0.147
Method:,Least Squares,F-statistic:,17.37
Date:,"Tue, 04 Oct 2022",Prob (F-statistic):,6.82e-05
Time:,20:43:01,Log-Likelihood:,-934.88
No. Observations:,95,AIC:,1872.0
Df Residuals:,94,BIC:,1874.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MeanPrec,8.0301,1.926,4.168,0.000,4.205,11.855

0,1,2,3
Omnibus:,38.894,Durbin-Watson:,2.097
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67.218
Skew:,1.859,Prob(JB):,2.53e-15
Kurtosis:,4.779,Cond. No.,1.0


In [51]:
lin_model = OLS(DF['BurnedArea'], DF[['MeanSlope']]).fit()

lin_model.summary()

0,1,2,3
Dep. Variable:,BurnedArea,R-squared (uncentered):,0.367
Model:,OLS,Adj. R-squared (uncentered):,0.36
Method:,Least Squares,F-statistic:,54.44
Date:,"Tue, 04 Oct 2022",Prob (F-statistic):,6.28e-11
Time:,20:43:02,Log-Likelihood:,-921.24
No. Observations:,95,AIC:,1844.0
Df Residuals:,94,BIC:,1847.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
MeanSlope,134.1136,18.176,7.379,0.000,98.025,170.202

0,1,2,3
Omnibus:,33.496,Durbin-Watson:,2.244
Prob(Omnibus):,0.0,Jarque-Bera (JB):,52.64
Skew:,1.66,Prob(JB):,3.71e-12
Kurtosis:,4.508,Cond. No.,1.0


In [66]:
DF.describe()

Unnamed: 0,BurnedArea,MeanTemp,MeanRH,MeanPrec,MeanSlope,Constant
count,95.0,95.0,95.0,95.0,95.0,95.0
mean,2271.031579,19.459339,73.958497,237.751453,20.773072,1.0
std,4419.316435,1.039421,6.162655,52.26464,8.271959,0.0
min,0.0,16.946964,59.4668,161.989716,6.998628,1.0
25%,11.0,18.85707,69.855628,198.070433,15.347996,1.0
50%,163.0,19.539299,74.178457,222.059969,20.436066,1.0
75%,926.5,20.121272,78.361854,272.611179,23.575485,1.0
max,14175.0,21.446843,90.088316,390.224813,45.989616,1.0


In [67]:
DF.corr()

Unnamed: 0,BurnedArea,MeanTemp,MeanRH,MeanPrec,MeanSlope,Constant
BurnedArea,1.0,-0.157235,-0.375948,-0.281468,0.546745,
MeanTemp,-0.157235,1.0,-0.356268,-0.250347,-0.096398,
MeanRH,-0.375948,-0.356268,1.0,0.696091,-0.408135,
MeanPrec,-0.281468,-0.250347,0.696091,1.0,-0.105249,
MeanSlope,0.546745,-0.096398,-0.408135,-0.105249,1.0,
Constant,,,,,,


In [73]:
DF[DF['BurnedArea'] > 926].mean()

BurnedArea    8506.375000
MeanTemp        19.273631
MeanRH          70.283004
MeanPrec       222.168830
MeanSlope       25.780672
Constant         1.000000
dtype: float64

In [72]:
DF[(DF['BurnedArea'] > 163)&(DF['BurnedArea'] <= 926)].mean()

BurnedArea    441.956522
MeanTemp       19.663330
MeanRH         73.391101
MeanPrec      233.892772
MeanSlope      19.204241
Constant        1.000000
dtype: float64

In [75]:
DF[(DF['BurnedArea'] > 11)&(DF['BurnedArea'] <=163)].mean()

BurnedArea     56.695652
MeanTemp       19.607423
MeanRH         74.508371
MeanPrec      232.718663
MeanSlope      19.917832
Constant        1.000000
dtype: float64

In [76]:
DF[DF['BurnedArea'] < 11].mean()

BurnedArea      4.521739
MeanTemp       19.202929
MeanRH         78.174720
MeanPrec      265.743167
MeanSlope      18.069657
Constant        1.000000
dtype: float64