### Import dependencies

In [2]:
import pandas as pd
import numpy as np

### Import dataset

In [3]:
with open("data/Fecondità.csv", "rb") as f:
    dataset=pd.read_csv(f)
    
dataset = dataset.sort_values(by=['Territorio', 'TIME']) # the proximity matrix is built with the prov in alphabetic order

dataset

Unnamed: 0,Territorio,TIME,età.media.delle.madri.al.parto,età.media.dei.padri.alla.nascita.del.figlio,tasso.di.fecondità.totale,tasso.di.inattività.dei.maschi,tasso.di.inattività.delle.femmine,tasso.di.inattività.totale,tasso.di.occupazione.dei.maschi,tasso.di.occupazione.delle.femmine,tasso.di.occupazione.totale
930,Agrigento,2011,30.94,34.80,1.37,42.911800,75.561168,59.924924,47.392418,19.828757,33.029603
931,Agrigento,2012,30.73,34.57,1.35,43.934839,74.366121,59.780301,45.282895,20.513632,32.385627
932,Agrigento,2013,31.09,34.78,1.27,46.361107,73.567265,60.518499,43.469198,19.889053,31.198656
933,Agrigento,2014,30.99,35.06,1.33,47.098738,72.427712,60.272133,39.673492,20.296449,29.595648
934,Agrigento,2015,31.14,34.97,1.27,45.169256,75.916130,61.148536,40.057236,18.153835,28.674135
...,...,...,...,...,...,...,...,...,...,...,...
625,Viterbo,2016,31.99,35.76,1.23,39.518267,60.635768,50.408778,52.769263,32.263986,42.194134
626,Viterbo,2017,32.17,35.73,1.16,41.158020,61.241767,51.511845,51.680541,33.283508,42.195912
627,Viterbo,2018,32.05,35.64,1.20,42.945825,61.820030,52.659989,50.508888,33.458828,41.733539
628,Viterbo,2019,31.97,35.51,1.11,44.935350,63.118101,54.287825,49.777042,32.974065,41.133933


### Get response

In [5]:
Y=np.array(dataset["tasso.di.fecondità.totale"])

print(np.var(Y))
print(np.mean(Y))


y = []
y_tmp = np.zeros(10)
for prov in np.unique(dataset["Territorio"]):
    data_prov = dataset[dataset["Territorio"] == prov]
    y_tmp = np.array(data_prov["tasso.di.fecondità.totale"])
    y_tmp = (y_tmp - np.mean(y_tmp))/np.sqrt(np.var(y_tmp)) #if you want to stand the answer
    y.append(y_tmp)
y = np.array(y)
print(y)
print(y.shape)

0.015125712353150588
1.3187169811320756
[[ 1.86310328  1.37281295 -0.58834841 ... -0.09805807 -1.32378391
  -0.83349357]
 [ 1.3764944   1.23310957  0.37280057 ... -0.7742781  -1.49120227
  -1.77797194]
 [ 1.44853442  1.32577726  0.46647719 ... -1.00660866 -1.37488013
  -1.37488013]
 ...
 [-0.15041421  2.25621314 -1.35372788 ...  0.15041421 -1.05289947
   0.45124263]
 [ 1.25685463  1.55971117  0.65114155 ... -0.56028459 -1.46885421
  -1.31742594]
 [ 1.33591282  1.33591282  0.22265214 ... -0.19482062 -1.44723888
  -1.72555405]]
(106, 10)


### Get covariates

In [7]:
X1=np.array(dataset["età.media.delle.madri.al.parto"])
X2=np.array(dataset["tasso.di.occupazione.delle.femmine"])
X3=np.array(dataset["tasso.di.inattività.delle.femmine"])
X4=np.array(dataset["tasso.di.occupazione.dei.maschi"])
X5=np.array(dataset["tasso.di.inattività.dei.maschi"])
X6=np.array(dataset["età.media.dei.padri.alla.nascita.del.figlio"]) 


P = 6
X=np.ones((1060,P))

X1 = (X1 - np.mean(X1))/np.sqrt(np.var(X1))  
X[:,0]=X1
X2 = (X2 - np.mean(X2))/np.sqrt(np.var(X2))  
X[:,1]=X2
X3 = (X3 - np.mean(X3))/np.sqrt(np.var(X3)) 
X[:,2]=X3
X4 = (X4 - np.mean(X4))/np.sqrt(np.var(X4))  
X[:,3]=X4
X5 = (X5 - np.mean(X5))/np.sqrt(np.var(X5)) 
X[:,4]=X5
X6 = (X6 - np.mean(X6))/np.sqrt(np.var(X6)) 
X[:,5]= X6


X_r = np.reshape(X,(106,10,P))

X_r.shape

(106, 10, 6)

### Linear regression model

In [12]:
from sklearn.linear_model import LinearRegression

models = []
for i in range(106):
    models.append(LinearRegression().fit(X_r[i], y[i]))

### Beta_ols calculation

In [13]:
#calcolo betaols for each province
betaols = []
for j in range(106):
    beta = np.zeros(P+1)
    beta[0] = models[j].intercept_
    for i in range(1,P+1):
        beta[i] = models[j].coef_[i-1]
    betaols.append(beta)

betaols = np.array(betaols)
  
print(betaols.shape)

sum = np.zeros(P+1)
for j in range(106):
    sum += betaols[j]
mean = sum/106
print(mean)

(106, 7)
[ 0.22190255 -1.4776945   0.10458943 -0.3656493  -0.10999949 -0.58602544
  0.1946157 ]


### Estimate of sigma for each model

In [15]:
sigma_hat = np.zeros(106)
for i in range(106):
    sigma_hat[i] = np.sum((models[i].predict(X_r[i]) - y[i])**2)

sigma_hat = sigma_hat/(10 - (P + 1)) #da capire quanto valgono i degree of freedom 
print(np.mean(sigma_hat))
sigma_hat = np.sqrt(sigma_hat) 
print(sigma_hat)
np.mean(sigma_hat)

0.4061680563322391
[1.05040143 0.51189058 0.17255555 0.85188948 0.37091199 0.96572275
 0.95443362 0.67823846 0.84671775 0.4409707  1.07452845 0.51349011
 0.41649899 0.45086285 0.06155165 0.31658969 0.93827674 0.61046933
 0.77131475 0.4746582  0.35114254 0.76771972 0.34996068 0.52543628
 1.46383736 0.34481922 1.34133188 0.43303864 0.32661697 0.60512518
 0.83680729 0.26286305 0.39658552 0.42641028 0.7991152  0.32633022
 0.68664237 1.15006024 0.92331927 0.87213975 0.3707225  1.28357197
 0.41630002 0.41371192 0.56841558 0.25726729 0.31596498 0.368403
 0.29884462 0.25854946 0.39414602 0.87879691 0.60017411 0.23401992
 0.23819945 0.52528963 0.19044605 0.55401047 0.44520307 0.7452624
 0.04489841 0.56100515 0.28138466 0.2842623  0.1346328  0.19195227
 0.65189578 0.89268004 0.30364632 0.25904023 0.39363436 1.58689513
 0.44443119 1.16263733 0.30052493 0.722512   0.23732558 0.7056325
 0.30283321 0.09966831 0.18663427 0.2329638  0.81046785 0.6890651
 0.57735688 0.29343456 0.138419   0.98331038 0.4

0.5507728012634348