# Tariffazione con i GLM
## Statistica Assicurativa
### _Leonardo Stincone, 10/02/2020_

In [1]:
/* Percorso in cui si trovano i dati */
libname dati '/folders/myfolders/data';

/* Parametri grafici */
ods graphics on / width = 10cm height = 8cm;

/* Carico il dataset con le polizze */
data polizze;
    set dati.polizze;
run;

data polizze;
    set polizze;
    freqsin = nsin / espo;
run;

/* Creo un dataset con le sole polizze sinistrate */
data polizze_sin;
    set polizze;
    where nsin > 0;
    dannomedio = dannotot / nsin;
run;

proc print data = polizze (obs=10) round;
run;

SAS Connection established. Subprocess id is 2476



Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin
1,F,35,MI,NO,B,18,620,10,1.0,0,0.0,0
2,M,37,AP,NO,B,37,790,12,1.0,0,0.0,0
3,M,45,LE,NO,B,63,850,15,1.0,0,0.0,0
4,F,21,PS,NO,B,34,710,13,1.0,0,0.0,0
5,M,48,SP,NO,B,33,770,12,0.2,0,0.0,0
6,F,45,TO,NO,B,44,780,14,1.0,0,0.0,0
7,M,35,PG,NO,D,51,1035,19,1.0,0,0.0,0
8,M,70,PG,SI,B,65,970,15,1.0,0,0.0,0
9,F,46,AR,NO,B,61,820,15,1.0,0,0.0,0
10,M,55,AR,NO,B,80,1180,18,1.0,1,359.06,1


## 1) Analisi preliminari

In [2]:
proc means data = polizze nway noprint;
    var espo nsin dannotot;
    output out = polizzeMeans sum = totespo totnsin totdannotot;
run;

data polizzeMeans;
    set polizzeMeans;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed=0;
    totqd = totdannotot / totespo;
    
    drop _TYPE_;
    rename _FREQ_ = totnpol;
    
    format totfreqsin 5.3;
    format totespo 10.2;
    format totdannotot 10.2;
    format totqd 10.2;
run;

proc print data = polizzeMeans noobs round;
    var totfreqsin totdannomed totqd totespo;
run;

totfreqsin,totdannomed,totqd,totespo
0.103,3000,308.83,123282.32


In [3]:
proc means data = polizze maxdec = 2;
    var eta potf potkil massa;
    weight espo;
run;

Variable,Label,N,Mean,Std Dev,Minimum,Maximum
Eta potf Potkil Massa,Eta  Potkil Massa,172161 172161 172161 172161,42.64 14.85 51.54 920.92,12.20 2.66 19.79 169.66,18.00 8.00 17.00 555.00,95.00 41.00 362.00 2240.00


### `1. Sesso`

In [4]:
proc means data = polizze nway noprint;
    class sesso;
    var espo nsin dannotot;
    output out = polizzebysesso sum = totespo totnsin totdannotot;
run;

data polizzebysesso;
    set polizzebysesso;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed=0;
    totqd = totdannotot / totespo;
    
    drop _TYPE_;
    rename _FREQ_ = totnpol;
    
    format totfreqsin 5.3;
    format totespo 10.2;
    format totdannotot 10.2;
    format totqd 10.2;
run;

proc print data = polizzebysesso noobs;
    var sesso totfreqsin totdannomed totqd totespo;
run;

Sesso,totfreqsin,totdannomed,totqd,totespo
F,0.098,2758.37,269.97,39735.73
M,0.105,3106.76,327.31,83546.59


In [5]:
proc sgplot data=polizze;
    vline sesso / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [6]:
proc sgplot data=polizze_sin;
    vline sesso / response=dannomedio weight=nsin stat=mean limitstat=clm alpha = .05;
run;

### `2. Capoluogo`

In [7]:
proc means data = polizze nway noprint;
    class capoluogo;
    var espo nsin dannotot;
    output out = polizzebycapoluogo sum = totespo totnsin totdannotot;
run;

data polizzebycapoluogo;
    set polizzebycapoluogo;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed=0;
    totqd = totdannotot / totespo;
    
    drop _TYPE_;
    rename _FREQ_ = totnpol;
    
    format totfreqsin 5.3;
    format totespo 10.2;
    format totdannotot 10.2;
    format totqd 10.2;
run;

proc print data=polizzebycapoluogo noobs;
    var capoluogo totfreqsin totdannomed totqd totespo;
run;

Capoluogo,totfreqsin,totdannomed,totqd,totespo
NO,0.097,3117.84,301.35,101269.74
SI,0.132,2602.67,343.24,22012.58


In [8]:
proc sgplot data=polizze;
    vline capoluogo / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [9]:
proc sgplot data=polizze_sin;
    vline capoluogo / response=dannomedio weight=nsin stat=mean limitstat=clm alpha = .05;
run;

### `3. Bendie`

In [10]:
proc means data = polizze nway noprint;
    class bendie;
    var espo nsin dannotot;
    output out=polizzebybendie sum=totespo totnsin totdannotot;
run;

data polizzebybendie;
    set polizzebybendie;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed=0;
    totqd = totdannotot / totespo;
    
    drop _TYPE_;
    rename _FREQ_ = totnpol;
    
    format totfreqsin 5.3;
    format totespo 10.2;
    format totdannotot 10.2;
    format totqd 10.2;
run;

proc print data = polizzebybendie noobs;
    var bendie totfreqsin totdannomed totqd totespo;
run;

Bendie,totfreqsin,totdannomed,totqd,totespo
B,0.099,3044.79,300.6,114456.64
D,0.158,2636.14,415.48,8825.68


In [11]:
proc sgplot data=polizze;
    vline bendie / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [12]:
proc sgplot data=polizze_sin;
    vline bendie / response=dannomedio weight=nsin stat=mean limitstat=clm alpha = .05;
run;

### `4. Prov`

In [13]:
proc means data = polizze nway noprint;
    class prov;
    var espo nsin dannotot;
    output out = polizzebyprov sum = totespo totnsin totdannotot;
run;

data polizzebyprov;
    set polizzebyprov;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed=0;
    totqd = totdannotot / totespo;
    
    drop _TYPE_;
    rename _FREQ_ = totnpol;
    
    format totfreqsin 5.3;
    format totespo 10.2;
    format totdannotot 10.2;
    format totqd 10.2;
run;

proc sort data = polizzebyprov;
    by descending totfreqsin;
run;

proc print data = polizzebyprov (obs = 10);
    var prov totfreqsin totdannomed totqd totespo;
run;

Obs,Prov,totfreqsin,totdannomed,totqd,totespo
1,KR,0.247,3052.88,753.71,8.1
2,,0.166,2255.6,374.41,2138.67
3,AO,0.165,1607.49,265.26,145.44
4,CE,0.162,1999.7,323.63,1211.09
5,ROMA,0.146,2239.97,326.51,617.43
6,CA,0.145,2485.66,359.45,567.04
7,SP,0.142,3161.38,449.8,611.48
8,PT,0.142,3019.11,427.32,847.82
9,TA,0.141,1735.74,244.96,524.35
10,PA,0.14,2130.07,298.28,692.7


In [14]:
/* Parametri grafici */
ods graphics on / width = 16cm height = 10cm;

proc sgplot data=polizze;
    vline prov / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05
                 categoryorder=respdesc;
    yaxis min = 0.05 max = 0.25;
run;

### `5. eta`

In [15]:
proc means data = polizze nway noprint;
    class eta;
    var espo nsin dannotot;
    output out = polizzebyeta sum = totespo totnsin totdannotot;
run;

data polizzebyeta;
    set polizzebyeta;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed=0;
    totqd = totdannotot / totespo;
    
    drop _TYPE_;
    rename _FREQ_ = totnpol;
    
    format totfreqsin 5.3;
    format totespo 10.2;
    format totdannotot 10.2;
    format totqd 10.2;
run;

proc print data = polizzebyeta (obs=10) noobs;
    var eta totfreqsin totdannomed totqd totespo;
run;

Eta,totfreqsin,totdannomed,totqd,totespo
18,0.253,2816.35,711.71,91.01
19,0.19,6995.11,1332.43,593.24
20,0.208,3387.97,703.64,1266.33
21,0.158,3988.27,629.14,1939.81
22,0.174,2767.0,482.41,2156.65
23,0.141,4512.06,636.44,2566.41
24,0.144,2912.02,417.98,2724.07
25,0.129,3168.98,408.31,2832.83
26,0.129,3389.06,437.45,2974.93
27,0.108,2897.5,313.58,3132.37


In [16]:
proc sgplot data=polizze;
    vline eta / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
    yaxis min = 0.05 max = 0.30;
run;

In [17]:
proc sgplot data=polizze_sin;
    vline eta / response=dannomedio weight=nsin stat=mean limitstat=clm alpha = .05;
    yaxis min = 0 max = 8000;
run;

## 2) Cluster Analysis

### `2.1. Prov`

In [18]:
proc cluster method = ward data = polizzebyprov outtree = clusterprov print = 5;
    id prov;
    var totfreqsin;
    freq totespo;
    copy totespo totnsin totdannotot totdannomed;
run;

Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix
Unnamed: 0_level_1,Eigenvalue,Difference,Proportion,Cumulative
1,0.0004035,,1.0,1.0

0,1
Root-Mean-Square Total-Sample Standard Deviation,0.020087

0,1
Root-Mean-Square Distance Between Observations,0.028408

Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History
Number of Clusters,Clusters Joined,Clusters Joined.1,Freq,Semipartial R-Square,R-Square,Tie
5,CL16,CL10,9515,0.0285,0.917,
4,CL11,CL8,52015,0.0528,0.864,
3,CL6,CL7,61702,0.0656,0.798,
2,CL5,CL4,61530,0.208,0.59,
1,CL2,CL3,123232,0.5902,0.0,


In [19]:
/* clusterprov contiene una riga per ogni passo della procedura di clustering */
proc print data = clusterprov (obs = 10);
run;

Obs,_NAME_,_PARENT_,_NCL_,_FREQ_,_HEIGHT_,_RMSSTD_,_SPRSQ_,_RSQ_,_PSF_,_PST2_,_ERSQ_,_RATIO_,_LOGR_,_CCC_,totfreqsin,_DIST_,_AVLINK_,Prov,totespo,totnsin,totdannotot,totdannomed
1,RSM,,103,0,0,0,0,1,.,.,.,.,.,.,0.0,0,0,RSM,0.04,0,0.0,0.0
2,VT,CL102,103,944,0,0,0,1,.,.,.,.,.,.,0.09,0,0,VT,944.67,85,157469.91,1852.59
3,RC,CL102,103,355,0,0,0,1,.,.,.,.,.,.,0.09,0,0,RC,355.66,32,68799.84,2150.0
4,BN,CL101,103,60,0,0,0,1,.,.,.,.,.,.,0.132,0,0,BN,60.39,8,13033.78,1629.22
5,PO,CL101,103,67,0,0,0,1,.,.,.,.,.,.,0.132,0,0,PO,67.96,9,24085.55,2676.17
6,OR,CL100,103,186,0,0,0,1,.,.,.,.,.,.,0.091,0,0,OR,186.91,17,156203.33,9188.43
7,CN,CL100,103,1154,0,0,0,1,.,.,.,.,.,.,0.091,0,0,CN,1154.79,105,277702.8,2644.79
8,VC,CL99,103,1644,0,0,0,1,.,.,.,.,.,.,0.094,0,0,VC,1644.71,154,295996.92,1922.06
9,RA,CL99,103,1303,0,0,0,1,.,.,.,.,.,.,0.094,0,0,RA,1303.27,122,692249.43,5674.18
10,EN,CL98,103,124,0,0,0,1,.,.,.,.,.,.,0.097,0,0,EN,124.29,12,15241.08,1270.09


Con 12 cluster arrivo a $R^2 ≥ 0.99$

In [20]:
proc tree data = clusterprov nclusters = 12 out = prov12cl noprint;
    id prov;
    copy totespo totnsin totdannotot totfreqsin totdannomed;
run;

/* Assegno San Marino al cluster 12, che è quello con la frequenza sinistri più bassa */
data prov12cl;
  set prov12cl;
  if prov='RSM' then cluster=12;
run;

/* prov12cl contiene tante righe quante sono le province e indica di ogni provincia a quale cluster appartiene */
proc print data = prov12cl (obs = 10);
run;

Obs,Prov,totespo,totnsin,totdannotot,totfreqsin,totdannomed,CLUSTER,CLUSNAME
1,RSM,0.04,0,0.0,0.0,0.0,12,
2,VT,944.67,85,157469.91,0.09,1852.59,1,CL14
3,RC,355.66,32,68799.84,0.09,2150.0,1,CL14
4,BN,60.39,8,13033.78,0.132,1629.22,2,CL26
5,PO,67.96,9,24085.55,0.132,2676.17,2,CL26
6,OR,186.91,17,156203.33,0.091,9188.43,1,CL14
7,CN,1154.79,105,277702.8,0.091,2644.79,1,CL14
8,VC,1644.71,154,295996.92,0.094,1922.06,1,CL14
9,RA,1303.27,122,692249.43,0.094,5674.18,1,CL14
10,EN,124.29,12,15241.08,0.097,1270.09,3,CL20


In [21]:
/* polizzebyclusterprov contiene una riga per ogni cluster e indica di ogni cluster una serie di informazioni */
proc means data = prov12cl nway noprint;
    class cluster;
    var totespo totnsin totdannotot;
    output out = polizzebyclusterprov sum = totespocl totnsincl totdannototcl;
run;

data polizzebyclusterprov;
    set polizzebyclusterprov;
    
    totfreqsincl = totnsincl / totespocl;
    if totnsincl > 0 then totdannomedcl = totdannototcl / totnsincl;
        else totdannomedcl = 0;
    totqdcl = totdannototcl / totespocl;
    
    drop _type_;
    rename _FREQ_ = numprov;
    
    format totfreqsincl 5.3;
    format totespocl 10.2;
    format totdannototcl 10.2;
    format totqdcl 10.2;
run;

proc print data = polizzebyclusterprov;
run;

Obs,CLUSTER,numprov,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,1,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
2,2,4,2158.09,289,537774.02,0.134,1860.81,249.19
3,3,12,12480.92,1215,3369680.21,0.097,2773.4,269.99
4,4,10,18644.35,1567,5068192.03,0.084,3234.33,271.84
5,5,15,15491.65,1603,5149935.33,0.103,3212.69,332.43
6,6,9,12556.48,1378,4973336.43,0.11,3609.1,396.08
7,7,7,6464.18,758,2128488.68,0.117,2808.03,329.27
8,8,6,17521.58,2133,5941180.86,0.122,2785.36,339.08
9,9,6,3860.83,550,1377816.14,0.142,2505.12,356.87
10,10,8,7454.64,540,1906127.5,0.072,3529.87,255.7


In [22]:
proc sort data = prov12cl;
    by cluster;
run;

/* Arricchisco il dataset prov12cl con le informazioni dei cluster a cui ogni provincia appartiene appartengono */
data prov12cl;
    merge prov12cl polizzebyclusterprov;
    by cluster;
run;

proc print data = prov12cl (obs = 10);
run;

Obs,Prov,totespo,totnsin,totdannotot,totfreqsin,totdannomed,CLUSTER,CLUSNAME,numprov,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,VT,944.67,85,157469.91,0.09,1852.59,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
2,RC,355.66,32,68799.84,0.09,2150.0,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
3,OR,186.91,17,156203.33,0.091,9188.43,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
4,CN,1154.79,105,277702.8,0.091,2644.79,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
5,VC,1644.71,154,295996.92,0.094,1922.06,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
6,RA,1303.27,122,692249.43,0.094,5674.18,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
7,LC,119.11,11,24287.52,0.092,2207.96,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
8,FE,964.48,89,207535.55,0.092,2331.86,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
9,SS,531.54,47,169022.57,0.088,3596.22,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25
10,VV,11.36,1,1580.75,0.088,1580.75,1,CL14,19,21730.39,2002,5981240.95,0.092,2987.63,275.25


In [23]:
proc sgplot data = prov12cl;
    scatter x = totfreqsin y = totdannomed / group = cluster markerattrs=(symbol=CircleFilled);
run;

### `2.2. eta`

In [24]:
proc means data = polizze nway noprint;
    class eta;
    var espo nsin dannotot;
    output out = polizzebyeta sum = totespo totnsin totdannotot;
run;

data polizzebyeta;
    set polizzebyeta;
    
    totfreqsin = totnsin/totespo;
    if totnsin>0 then totdannomed = totdannotot/totnsin;
        else totdannomed = 0;
    totqd = totdannotot / totespo;
        
    drop _TYPE_;
    rename _FREQ_ = totnpol;
run;

proc print data = polizzebyeta (obs = 10) noobs;
    var eta totfreqsin totdannomed totqd totespo;
run;

Eta,totfreqsin,totdannomed,totqd,totespo
18,0.25271,2816.35,711.71,91.01
19,0.19048,6995.11,1332.43,593.24
20,0.20769,3387.97,703.64,1266.33
21,0.15775,3988.27,629.14,1939.81
22,0.17434,2767.0,482.41,2156.65
23,0.14105,4512.06,636.44,2566.41
24,0.14353,2912.02,417.98,2724.07
25,0.12885,3168.98,408.31,2832.83
26,0.12908,3389.06,437.45,2974.93
27,0.10822,2897.5,313.58,3132.37


In [25]:
proc sgplot data = polizzebyeta;
    scatter x = eta y = totfreqsin / markerattrs=(symbol=CircleFilled);
    series x = eta y = totfreqsin;
run;

In [26]:
/* Preraggruppamento */
proc format;
    value formateta
        low-22 = "18-22"
        23-24 = "23-24"
        25-26 = "25-26"
        27-30 = "27-30"
        31-34 = "31-34"
        35-43 = "35-43"
        44-51 = "44-51"
        52-60 = "52-60"
        61-64 = "61-64"
        65-69 = "65-69"
        70-81 = "70-81"
        82-high = "82-";
run;

data polizzebyeta;
    set polizzebyeta;
    leveleta = eta;
    format leveleta formateta.;
run;

/* Calcolo le informazioni a livello di ogni gruppo */
proc means data = polizzebyeta nway noprint;
    class leveleta;
    var eta totfreqsin;
    weight totespo;
    output out = polizzebyleveleta1 mean = etamed totfreqsinmed;
run;

proc means data = polizzebyeta nway noprint;
    class leveleta;
    var totespo totnsin totdannotot totnpol;
    output out = polizzebyleveleta2 sum = totespo totnsin totdannotot totnpol;
run;

data polizzebyleveleta;
    merge polizzebyleveleta1 polizzebyleveleta2;
    by leveleta;
    drop _type_ _freq_;
run;

/* Standardizzo eta e freqsin in modo da poterle usare assieme per l'accorpamento */
proc standard data = polizzebyleveleta out = polizzebyleveletastd mean = 0 std = 1;
    weight totespo;
    var etamed totfreqsinmed;
run;

data polizzebyleveletastd;
    set polizzebyleveletastd;
    format etamed 8.4;
    format totfreqsinmed 8.4;
run;

data polizzebyleveletastd;
    set polizzebyleveletastd;
    rename totfreqsinmed = totfreqsinmedstd;
    rename etamed = etamedstd;
run;

data polizzebyleveleta;
    merge polizzebyleveleta polizzebyleveletastd;
    by leveleta;
run;

proc print data = polizzebyleveleta;
run;

Obs,leveleta,etamed,totfreqsinmed,totespo,totnsin,totdannotot,totnpol,etamedstd,totfreqsinmedstd
1,18-22,21,0.17877,6047.04,1081,4007061.49,8991,-0.0144,0.0323
2,23-24,24,0.14233,5290.49,753,2771965.77,7542,-0.0127,0.0168
3,25-26,26,0.12897,5807.76,749,2458076.42,8292,-0.0113,0.0111
4,27-30,29,0.10288,13053.9,1343,4436387.56,18671,-0.0093,-0.0
5,31-34,32,0.09152,12992.03,1189,2940408.97,18382,-0.0067,-0.0049
6,35-43,39,0.08305,25371.75,2107,6632792.66,35267,-0.0025,-0.0085
7,44-51,47,0.101,20356.78,2056,5287609.56,28264,0.0031,-0.0008
8,52-60,56,0.10451,17873.66,1868,5358383.58,24606,0.0087,0.0007
9,61-64,62,0.09028,5781.94,522,1467871.43,7813,0.0131,-0.0054
10,65-69,67,0.09161,5414.12,496,1130324.59,7296,0.016,-0.0048


In [27]:
/* Effettuo il clustering usando sia etamedstd che totfreqsinmedstd */
proc cluster method = ward data = polizzebyleveleta outtree = clustereta;
    id leveleta;
    var etamedstd totfreqsinmedstd;
    freq totespo;
    copy totespo totnsin totdannotot etamed totfreqsinmed;
run;

Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix
Unnamed: 0_level_1,Eigenvalue,Difference,Proportion,Cumulative
1,0.00012676,7.507e-05,0.7103,0.7103
2,5.169e-05,,0.2897,1.0

0,1
Root-Mean-Square Total-Sample Standard Deviation,0.009446

0,1
Root-Mean-Square Distance Between Observations,0.018892

Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History
Number of Clusters,Clusters Joined,Clusters Joined.1,Freq,Semipartial R-Square,R-Square,Tie
11,61-64,65-69,11195,0.0011,0.999,
10,70-81,82-,5292,0.0014,0.997,
9,23-24,25-26,11097,0.0043,0.993,
8,27-30,31-34,26045,0.0089,0.984,
7,CL11,CL10,16487,0.009,0.975,
6,44-51,52-60,38229,0.0145,0.961,
5,CL8,35-43,51416,0.0394,0.921,
4,18-22,CL9,17144,0.0621,0.859,
3,CL6,CL7,54716,0.0694,0.79,
2,CL5,CL3,106132,0.2665,0.523,


Con 9 cluster arrivo a $R^2 ≥ 0.99$

In [28]:
proc tree data = clustereta nclusters = 9 out = cluster9eta noprint;
    id leveleta;
    copy totespo totnsin totdannotot etamed totfreqsinmed;
run;

/* cluster9eta contiene tante righe quanti erano i cluster nel preraggruppamento manuale */
proc print data = cluster9eta;
run;

Obs,leveleta,totespo,totnsin,totdannotot,etamed,totfreqsinmed,CLUSTER,CLUSNAME
1,61-64,5781.94,522,1467871.43,62,0.09028,1,CL11
2,65-69,5414.12,496,1130324.59,67,0.09161,1,CL11
3,70-81,4899.85,484,1486592.19,73,0.09878,2,CL10
4,82-,393.0,43,95525.78,86,0.10941,2,CL10
5,23-24,5290.49,753,2771965.77,24,0.14233,3,CL9
6,25-26,5807.76,749,2458076.42,26,0.12897,3,CL9
7,27-30,13053.9,1343,4436387.56,29,0.10288,4,27-30
8,31-34,12992.03,1189,2940408.97,32,0.09152,5,31-34
9,44-51,20356.78,2056,5287609.56,47,0.101,6,44-51
10,52-60,17873.66,1868,5358383.58,56,0.10451,7,52-60


In [29]:
/* polizzebyclustereta contiene una riga per ogni cluster e indica di ogni cluster una serie di informazioni */
proc means data = cluster9eta nway noprint;
    class cluster;
    var totespo totnsin totdannotot;
    output out = polizzebyclustereta sum = totespocl totnsincl totdannototcl;
run;

data polizzebyclustereta;
    set polizzebyclustereta;
    
    totfreqsincl = totnsincl/totespocl;
    if totnsincl>0 then totdannomedcl = totdannototcl/totnsincl;
        else totdannomedcl = 0;
    totqdcl = totdannototcl / totespocl;
    
    drop _type_;
    
    format totfreqsincl 5.3;
    format totespocl 10.2;
    format totdannototcl 10.2;
    format totqdcl 10.2;
run;

proc print data = polizzebyclustereta;
run;

Obs,CLUSTER,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,1,2,11196.06,1018,2598196.02,0.091,2552.26,232.06
2,2,2,5292.85,527,1582117.97,0.1,3002.12,298.92
3,3,2,11098.25,1502,5230042.2,0.135,3482.05,471.25
4,4,1,13053.9,1343,4436387.56,0.103,3303.34,339.85
5,5,1,12992.03,1189,2940408.97,0.092,2473.01,226.32
6,6,1,20356.78,2056,5287609.56,0.101,2571.79,259.75
7,7,1,17873.66,1868,5358383.58,0.105,2868.51,299.79
8,8,1,25371.75,2107,6632792.66,0.083,3147.98,261.42
9,9,1,6047.04,1081,4007061.49,0.179,3706.81,662.65


In [30]:
proc sort data = cluster9eta;
    by cluster;
run;

/* Arricchisco il dataset cluster9eta con le informazioni dei cluster a cui ogni gruppo appartiene appartengono */
data cluster9eta;
    merge cluster9eta polizzebyclustereta;
    by cluster;
run;

proc print data = cluster9eta;
run;

Obs,leveleta,totespo,totnsin,totdannotot,etamed,totfreqsinmed,CLUSTER,CLUSNAME,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,61-64,5781.94,522,1467871.43,62,0.09028,1,CL11,2,11196.06,1018,2598196.02,0.091,2552.26,232.06
2,65-69,5414.12,496,1130324.59,67,0.09161,1,CL11,2,11196.06,1018,2598196.02,0.091,2552.26,232.06
3,70-81,4899.85,484,1486592.19,73,0.09878,2,CL10,2,5292.85,527,1582117.97,0.1,3002.12,298.92
4,82-,393.0,43,95525.78,86,0.10941,2,CL10,2,5292.85,527,1582117.97,0.1,3002.12,298.92
5,23-24,5290.49,753,2771965.77,24,0.14233,3,CL9,2,11098.25,1502,5230042.2,0.135,3482.05,471.25
6,25-26,5807.76,749,2458076.42,26,0.12897,3,CL9,2,11098.25,1502,5230042.2,0.135,3482.05,471.25
7,27-30,13053.9,1343,4436387.56,29,0.10288,4,27-30,1,13053.9,1343,4436387.56,0.103,3303.34,339.85
8,31-34,12992.03,1189,2940408.97,32,0.09152,5,31-34,1,12992.03,1189,2940408.97,0.092,2473.01,226.32
9,44-51,20356.78,2056,5287609.56,47,0.101,6,44-51,1,20356.78,2056,5287609.56,0.101,2571.79,259.75
10,52-60,17873.66,1868,5358383.58,56,0.10451,7,52-60,1,17873.66,1868,5358383.58,0.105,2868.51,299.79


### `2.3 potf`

In [31]:
proc means data = polizze nway noprint;
    class potf;
    var espo nsin dannotot;
    output out=polizzebypotf sum=totespo totnsin totdannotot;
run;

data polizzebypotf;
    set polizzebypotf;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed = 0;
    totqd = totdannotot / totespo;
        
    drop _TYPE_;
    rename _FREQ_ = totnpol;
run;

proc print data = polizzebypotf (obs = 10) noobs;
    var potf totfreqsin totdannomed totqd totespo;
run;

potf,totfreqsin,totdannomed,totqd,totespo
8,0.05539,1944.0,107.686,108.32
9,0.0735,2652.59,194.962,462.59
10,0.08866,2521.23,223.52,8798.13
11,0.0761,1693.54,128.886,604.43
12,0.0895,2387.18,213.641,23710.77
13,0.09886,2865.89,283.325,18773.85
14,0.10245,3726.9,381.824,9985.27
15,0.11341,2712.68,307.646,14425.51
16,0.0997,3201.22,319.15,6319.19
17,0.10883,3172.22,345.247,13543.5


In [32]:
proc sgplot data = polizzebypotf;
    scatter x = potf y = totfreqsin / markerattrs=(symbol=CircleFilled);
    series x = potf y = totfreqsin;
run;

In [33]:
/* Preraggruppamento */
proc format;
    value formatpotf
        low-13 = "8-13"
        14-15 = "14-15"
        16 = "16"
        17-21 = "17-21"
        22-23 = "22-23"
        24-26 = "24-26"
        27-28 = "27-28"
        29-30 = "29-30"
        31-high = "31-";
run;

data polizzebypotf;
    set polizzebypotf;
    levelpotf = potf;
    format levelpotf formatpotf.;
run;

/* Calcolo le informazioni a livello di ogni gruppo */
proc means data = polizzebypotf nway noprint;
    class levelpotf;
    var potf totfreqsin;
    weight totespo;
    output out = polizzebylevelpotf1 mean = potfmed totfreqsinmed;
run;

proc means data = polizzebypotf nway noprint;
    class levelpotf;
    var totespo totnsin totdannotot totnpol;
    output out = polizzebylevelpotf2 sum = totespo totnsin totdannotot totnpol;
run;

data polizzebylevelpotf;
    merge polizzebylevelpotf1 polizzebylevelpotf2;
    by levelpotf;
    drop _type_ _freq_;
run;

/* Standardizzo potf e freqsin in modo da poterle usare assieme per l'accorpamento */
proc standard data = polizzebylevelpotf out = polizzebylevelpotfstd mean = 0 std = 1;
    weight totespo;
    var potfmed totfreqsinmed;
run;

data polizzebylevelpotfstd;
    set polizzebylevelpotfstd;
    format potfmed 8.4;
    format totfreqsinmed 8.4;
run;

data polizzebylevelpotfstd;
    set polizzebylevelpotfstd;
    rename totfreqsinmed = totfreqsinmedstd;
    rename potfmed = potfmedstd;
run;

data polizzebylevelpotf;
    merge polizzebylevelpotf polizzebylevelpotfstd;
    by levelpotf;
run;

proc print data = polizzebylevelpotf;
run;

Obs,levelpotf,potfmed,totfreqsinmed,totespo,totnsin,totdannotot,totnpol,potfmedstd,totfreqsinmedstd
1,8-13,11.9762,0.09234,52458.09,4844,12530996.69,69752,-0.0078,-0.0072
2,14-15,14.5909,0.10893,24410.77,2659,8250561.58,33959,-0.0007,0.004
3,16,16.0,0.0997,6319.19,630,2016770.78,8812,0.0031,-0.0022
4,17-21,18.2424,0.11131,37795.14,4207,14159780.22,55913,0.0092,0.0057
5,22-23,22.8791,0.16838,1829.25,308,880999.1,2911,0.0217,0.0442
6,24-26,25.7794,0.10898,211.05,23,153340.75,350,0.0295,0.0041
7,27-28,27.5069,0.07383,40.63,3,32992.87,74,0.0342,-0.0197
8,29-30,29.5077,0.07867,114.4,9,31938.14,203,0.0396,-0.0164
9,31-,35.2581,0.07708,103.79,8,15619.88,187,0.0552,-0.0175


In [34]:
/* Effettuo il clustering usando sia potfmedstd che totfreqsinmedstd */
proc cluster method = ward data = polizzebylevelpotf outtree = clusterpotf;
    id levelpotf;
    var potfmedstd totfreqsinmedstd;
    freq totespo;
    copy totespo totnsin totdannotot potfmed totfreqsinmed;
run;

Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix
Unnamed: 0_level_1,Eigenvalue,Difference,Proportion,Cumulative
1,0.00011638,0.00010301,0.897,0.897
2,1.337e-05,,0.103,1.0

0,1
Root-Mean-Square Total-Sample Standard Deviation,0.008054

0,1
Root-Mean-Square Distance Between Observations,0.016109

Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History
Number of Clusters,Clusters Joined,Clusters Joined.1,Freq,Semipartial R-Square,R-Square,Tie
8,27-28,29-30,154,0.0001,1.0,
7,CL8,31-,257,0.0011,0.999,
6,24-26,CL7,468,0.0051,0.994,
5,14-15,16,30729,0.0168,0.977,
4,17-21,CL6,38263,0.0292,0.948,
3,CL5,CL4,68992,0.1028,0.845,
2,CL3,22-23,70821,0.2078,0.637,
1,8-13,CL2,123279,0.6371,0.0,


Con 6 cluster arrivo a $R^2 ≥ 0.99$

In [35]:
proc tree data = clusterpotf nclusters = 6 out = cluster6potf noprint;
    id levelpotf;
    copy totespo totnsin totdannotot potfmed totfreqsinmed;
run;

/* cluster6potf contiene tante righe quanti erano i cluster nel preraggruppamento manuale */
proc print data = cluster9eta;
run;

Obs,leveleta,totespo,totnsin,totdannotot,etamed,totfreqsinmed,CLUSTER,CLUSNAME,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,61-64,5781.94,522,1467871.43,62,0.09028,1,CL11,2,11196.06,1018,2598196.02,0.091,2552.26,232.06
2,65-69,5414.12,496,1130324.59,67,0.09161,1,CL11,2,11196.06,1018,2598196.02,0.091,2552.26,232.06
3,70-81,4899.85,484,1486592.19,73,0.09878,2,CL10,2,5292.85,527,1582117.97,0.1,3002.12,298.92
4,82-,393.0,43,95525.78,86,0.10941,2,CL10,2,5292.85,527,1582117.97,0.1,3002.12,298.92
5,23-24,5290.49,753,2771965.77,24,0.14233,3,CL9,2,11098.25,1502,5230042.2,0.135,3482.05,471.25
6,25-26,5807.76,749,2458076.42,26,0.12897,3,CL9,2,11098.25,1502,5230042.2,0.135,3482.05,471.25
7,27-30,13053.9,1343,4436387.56,29,0.10288,4,27-30,1,13053.9,1343,4436387.56,0.103,3303.34,339.85
8,31-34,12992.03,1189,2940408.97,32,0.09152,5,31-34,1,12992.03,1189,2940408.97,0.092,2473.01,226.32
9,44-51,20356.78,2056,5287609.56,47,0.101,6,44-51,1,20356.78,2056,5287609.56,0.101,2571.79,259.75
10,52-60,17873.66,1868,5358383.58,56,0.10451,7,52-60,1,17873.66,1868,5358383.58,0.105,2868.51,299.79


In [36]:
/* polizzebyclusterpotf contiene una riga per ogni cluster e indica di ogni cluster una serie di informazioni */
proc means data = cluster6potf nway noprint;
    class cluster;
    var totespo totnsin totdannotot;
    output out = polizzebyclusterpotf sum = totespocl totnsincl totdannototcl;
run;

data polizzebyclusterpotf;
    set polizzebyclusterpotf;
    
    totfreqsincl = totnsincl / totespocl;
    if totnsincl > 0 then totdannomedcl = totdannototcl / totnsincl;
        else totdannomedcl = 0;
    totqdcl = totdannototcl / totespocl;
    
    drop _type_;
    
    format totfreqsincl 5.3;
    format totespocl 10.2;
    format totdannototcl 10.2;
    format totqdcl 10.2;
run;

proc print data = polizzebyclusterpotf;
run;

Obs,CLUSTER,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,1,4,469.88,43,233891.63,0.092,5439.34,497.77
2,2,1,24410.77,2659,8250561.58,0.109,3102.88,337.99
3,3,1,6319.19,630,2016770.78,0.1,3201.22,319.15
4,4,1,37795.14,4207,14159780.2,0.111,3365.77,374.65
5,5,1,1829.25,308,880999.1,0.168,2860.39,481.62
6,6,1,52458.09,4844,12530996.7,0.092,2586.91,238.88


In [37]:
proc sort data = cluster6potf;
    by cluster;
run;

/* Arricchisco il dataset cluster9eta con le informazioni dei cluster a cui ogni gruppo appartiene appartengono */
data cluster6potf;
    merge cluster6potf polizzebyclusterpotf;
    by cluster;
run;

proc print data = cluster6potf;
run;

Obs,levelpotf,totespo,totnsin,totdannotot,potfmed,totfreqsinmed,CLUSTER,CLUSNAME,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,27-28,40.63,3,32992.87,27.5069,0.07383,1,CL6,4,469.88,43,233891.63,0.092,5439.34,497.77
2,29-30,114.4,9,31938.14,29.5077,0.07867,1,CL6,4,469.88,43,233891.63,0.092,5439.34,497.77
3,31-,103.79,8,15619.88,35.2581,0.07708,1,CL6,4,469.88,43,233891.63,0.092,5439.34,497.77
4,24-26,211.05,23,153340.75,25.7794,0.10898,1,CL6,4,469.88,43,233891.63,0.092,5439.34,497.77
5,14-15,24410.77,2659,8250561.58,14.5909,0.10893,2,14-15,1,24410.77,2659,8250561.58,0.109,3102.88,337.99
6,16,6319.19,630,2016770.78,16.0,0.0997,3,16,1,6319.19,630,2016770.78,0.1,3201.22,319.15
7,17-21,37795.14,4207,14159780.22,18.2424,0.11131,4,17-21,1,37795.14,4207,14159780.2,0.111,3365.77,374.65
8,22-23,1829.25,308,880999.1,22.8791,0.16838,5,22-23,1,1829.25,308,880999.1,0.168,2860.39,481.62
9,8-13,52458.09,4844,12530996.69,11.9762,0.09234,6,8-13,1,52458.09,4844,12530996.7,0.092,2586.91,238.88


### `2.4 potkil`

In [38]:
proc means data = polizze nway noprint;
    class potkil;
    var espo nsin dannotot;
    output out = polizzebypotkil sum = totespo totnsin totdannotot;
run;

data polizzebypotkil;
    set polizzebypotkil;
    
    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed = 0;
    totqd = totdannotot / totespo;
        
    drop _TYPE_;
    rename _FREQ_ = totnpol;
run;

proc print data = polizzebypotkil (obs = 10) noobs;
    var potkil totfreqsin totdannomed totqd totespo;
run;

Potkil,totfreqsin,totdannomed,totqd,totespo
17,0.0,0.0,0.0,1.0
18,0.07744,1962.64,151.979,761.92
21,0.07948,1004.69,79.857,50.32
22,0.06481,2123.15,137.611,1388.58
23,0.06825,2928.59,199.878,424.9
24,0.04795,1232.91,59.115,166.85
25,0.09376,2565.6,240.549,7231.26
26,0.05409,1834.78,99.243,110.93
27,0.14576,1629.22,237.483,54.88
28,0.60132,1617.55,972.671,1.66


In [39]:
proc sgplot data = polizzebypotkil;
    scatter x = potkil y = totfreqsin / markerattrs=(symbol=CircleFilled);
    series x = potkil y = totfreqsin;
run;

In [40]:
/* Preraggruppamento */
proc format;
    value formatpotkil
        low-26 = "17-26"
        27-33 = "27-33"
        34-39 = "34-39"
        40-49 = "40-49"
        50-57 = "50-57"
        58-65 = "58-65"
        66-78 = "66-78"
        79-92 = "79-92"
        93-108 = "93-108"
        109-123 = "109-123"
        124-139 = "124-139"
        140-143 = "140-143"
        144-150 = "144-150"
        151-high = "151-";
run;

data polizzebypotkil;
    set polizzebypotkil;
    levelpotkil = potkil;
    format levelpotkil formatpotkil.;
run;

/* Calcolo le informazioni a livello di ogni gruppo */
proc means data = polizzebypotkil nway noprint;
    class levelpotkil;
    var potkil totfreqsin;
    weight totespo;
    output out = polizzebylevelpotkil1 mean = potkilmed totfreqsinmed;
run;

proc means data = polizzebypotkil nway noprint;
    class levelpotkil;
    var totespo totnsin totdannotot totnpol;
    output out = polizzebylevelpotkil2 sum = totespo totnsin totdannotot totnpol;
run;

data polizzebylevelpotkil;
    merge polizzebylevelpotkil1 polizzebylevelpotkil2;
    by levelpotkil;
    drop _type_ _freq_;
run;

/* Standardizzo potkil e freqsin in modo da poterle usare assieme per l'accorpamento */
proc standard data = polizzebylevelpotkil out = polizzebylevelpotkilstd mean = 0 std = 1;
    weight totespo;
    var potkilmed totfreqsinmed;
run;

data polizzebylevelpotkilstd;
    set polizzebylevelpotkilstd;;
    format potkilmed 8.4;
    format totfreqsinmed 8.4;
run;

data polizzebylevelpotkilstd;
    set polizzebylevelpotkilstd;
    rename totfreqsinmed = totfreqsinmedstd;
    rename potkilmed = potkilmedstd;
run;

data polizzebylevelpotkil;
    merge polizzebylevelpotkil polizzebylevelpotkilstd;
    by levelpotkil;
run;

proc print data = polizzebylevelpotkil;
run;

Obs,levelpotkil,potkilmed,totfreqsinmed,totespo,totnsin,totdannotot,totnpol,potkilmedstd,totfreqsinmedstd
1,17-26,24,0.08623,10135.77,874,2156174.24,12869,-0.0122,-0.0182
2,27-33,32,0.09524,25850.67,2462,6016100.65,33836,-0.0085,-0.0084
3,34-39,37,0.09485,8866.64,841,2349060.15,11897,-0.0066,-0.0088
4,40-49,42,0.10566,21947.98,2319,6765891.74,30871,-0.0042,0.0029
5,50-57,54,0.10234,18468.22,1890,6129883.56,26263,0.0011,-0.0007
6,58-65,63,0.11136,8961.77,998,2819251.65,12396,0.0049,0.0091
7,66-78,70,0.11802,13294.86,1569,5232526.19,19805,0.0084,0.0164
8,79-92,84,0.1098,8551.79,939,3712429.07,12715,0.0146,0.0074
9,93-108,100,0.11632,4556.23,530,1947041.86,7146,0.0215,0.0145
10,109-123,113,0.10033,1335.57,134,394231.62,2114,0.0275,-0.0028


In [41]:
/* Effettuo il clustering usando sia potkilmedstd che totfreqsinmedstd */
proc cluster method = ward data = polizzebylevelpotkil outtree = clusterpotkil;
    id levelpotkil;
    var potkilmedstd totfreqsinmedstd;
    freq totespo;
    copy totespo totnsin totdannotot potkilmed totfreqsinmed;
run;

Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix
Unnamed: 0_level_1,Eigenvalue,Difference,Proportion,Cumulative
1,0.00017512,0.00013937,0.8305,0.8305
2,3.575e-05,,0.1695,1.0

0,1
Root-Mean-Square Total-Sample Standard Deviation,0.010268

0,1
Root-Mean-Square Distance Between Observations,0.020537

Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History
Number of Clusters,Clusters Joined,Clusters Joined.1,Freq,Semipartial R-Square,R-Square,Tie
13,140-143,144-150,266,0.0003,1.0,
12,27-33,34-39,34716,0.001,0.999,
11,109-123,124-139,1845,0.0011,0.998,
10,79-92,93-108,13107,0.0112,0.986,
9,58-65,66-78,22255,0.0132,0.973,
8,40-49,50-57,40415,0.0156,0.958,
7,CL11,151-,2381,0.0196,0.938,
6,CL10,CL13,13373,0.0269,0.911,
5,17-26,CL12,44851,0.0337,0.877,
4,CL9,CL6,35628,0.0378,0.84,


Con 11 cluster arrivo a $R^2 ≥ 0.99$

In [42]:
proc tree data = clusterpotkil nclusters = 11 out = cluster11potkil noprint;
    id levelpotkil;
    copy totespo totnsin totdannotot potkilmed totfreqsinmed;
run;

/* cluster11potkil contiene tante righe quanti erano i cluster nel preraggruppamento manuale */
proc print data = cluster11potkil;
run;

Obs,levelpotkil,totespo,totnsin,totdannotot,potkilmed,totfreqsinmed,CLUSTER,CLUSNAME
1,140-143,42.11,6,8233.18,141,0.14247,1,CL13
2,144-150,224.2,35,144582.22,147,0.15611,1,CL13
3,27-33,25850.67,2462,6016100.65,32,0.09524,2,CL12
4,34-39,8866.64,841,2349060.15,37,0.09485,2,CL12
5,109-123,1335.57,134,394231.62,113,0.10033,3,CL11
6,124-139,510.29,52,201130.36,133,0.1019,3,CL11
7,79-92,8551.79,939,3712429.07,84,0.1098,4,79-92
8,93-108,4556.23,530,1947041.86,100,0.11632,5,93-108
9,58-65,8961.77,998,2819251.65,63,0.11136,6,58-65
10,66-78,13294.86,1569,5232526.19,70,0.11802,7,66-78


In [43]:
/* polizzebyclusterpotkil contiene una riga per ogni cluster e indica di ogni cluster una serie di informazioni */
proc means data = cluster11potkil nway noprint;
    class cluster;
    var totespo totnsin totdannotot;
    output out = polizzebyclusterpotkil sum = totespocl totnsincl totdannototcl;
run;

data polizzebyclusterpotkil;
    set polizzebyclusterpotkil;
    
    totfreqsincl = totnsincl/totespocl;
    if totnsincl > 0 then totdannomedcl = totdannototcl / totnsincl;
        else totdannomedcl = 0;
    totqdcl = totdannototcl / totespocl;
    
    drop _type_;
    
    format totfreqsincl 5.3;
    format totespocl 10.2;
    format totdannototcl 10.2;
    format totqdcl 10.2;
run;

proc print data = polizzebyclusterpotkil;
run;

Obs,CLUSTER,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,1,2,266.31,41,152815.4,0.154,3727.2,573.82
2,2,2,34717.32,3303,8365160.8,0.095,2532.59,240.95
3,3,2,1845.86,186,595361.98,0.101,3200.87,322.54
4,4,1,8551.79,939,3712429.07,0.11,3953.6,434.11
5,5,1,4556.23,530,1947041.86,0.116,3673.66,427.34
6,6,1,8961.77,998,2819251.65,0.111,2824.9,314.59
7,7,1,13294.86,1569,5232526.19,0.118,3334.94,393.58
8,8,1,21947.98,2319,6765891.74,0.106,2917.59,308.27
9,9,1,18468.22,1890,6129883.56,0.102,3243.32,331.92
10,10,1,536.23,42,196463.52,0.078,4677.7,366.38


In [44]:
proc sort data = cluster11potkil;
    by cluster;
run;

/* Arricchisco il dataset cluster11potkil con le informazioni dei cluster a cui ogni gruppo appartiene appartengono */
data cluster11potkil;
    merge cluster11potkil polizzebyclusterpotkil;
    by cluster;
run;

proc print data = cluster11potkil;
run;

Obs,levelpotkil,totespo,totnsin,totdannotot,potkilmed,totfreqsinmed,CLUSTER,CLUSNAME,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,140-143,42.11,6,8233.18,141,0.14247,1,CL13,2,266.31,41,152815.4,0.154,3727.2,573.82
2,144-150,224.2,35,144582.22,147,0.15611,1,CL13,2,266.31,41,152815.4,0.154,3727.2,573.82
3,27-33,25850.67,2462,6016100.65,32,0.09524,2,CL12,2,34717.32,3303,8365160.8,0.095,2532.59,240.95
4,34-39,8866.64,841,2349060.15,37,0.09485,2,CL12,2,34717.32,3303,8365160.8,0.095,2532.59,240.95
5,109-123,1335.57,134,394231.62,113,0.10033,3,CL11,2,1845.86,186,595361.98,0.101,3200.87,322.54
6,124-139,510.29,52,201130.36,133,0.1019,3,CL11,2,1845.86,186,595361.98,0.101,3200.87,322.54
7,79-92,8551.79,939,3712429.07,84,0.1098,4,79-92,1,8551.79,939,3712429.07,0.11,3953.6,434.11
8,93-108,4556.23,530,1947041.86,100,0.11632,5,93-108,1,4556.23,530,1947041.86,0.116,3673.66,427.34
9,58-65,8961.77,998,2819251.65,63,0.11136,6,58-65,1,8961.77,998,2819251.65,0.111,2824.9,314.59
10,66-78,13294.86,1569,5232526.19,70,0.11802,7,66-78,1,13294.86,1569,5232526.19,0.118,3334.94,393.58


### `2.5 massa`

In [45]:
proc means data = polizze nway noprint;
    class massa;
    var espo nsin dannotot;
    output out = polizzebymassa sum = totespo totnsin totdannotot;
run;

data polizzebymassa;
    set polizzebymassa;

    totfreqsin = totnsin / totespo;
    if totnsin > 0 then totdannomed = totdannotot / totnsin;
        else totdannomed = 0;
    totqd = totdannotot / totespo;

    drop _TYPE_;
    rename _FREQ_ = totnpol;
run;

proc print data = polizzebymassa (obs = 10) noobs;
    var massa totfreqsin totdannomed totqd totespo;
run;

Massa,totfreqsin,totdannomed,totqd,totespo
555,0.0,0.0,0.0,3.293
595,0.08505,1004.69,85.449,47.031
600,0.0,0.0,0.0,1.0
620,0.07744,1962.64,151.979,761.919
640,0.1004,2535.3,254.545,597.609
645,0.09502,3869.34,367.668,210.48
650,0.06461,2292.56,148.115,386.957
655,0.08441,2658.69,224.424,485.715
660,0.09553,4501.34,430.005,83.745
665,0.16397,1850.27,303.397,12.197


In [46]:
proc sgplot data = polizzebymassa;
    scatter x = massa y = totfreqsin / markerattrs=(symbol=CircleFilled);
    series x = massa y = totfreqsin;
run;

In [47]:
/* Preraggruppamento */
proc format;
    value formatmassa
        low-691 = "555-691"
        692-720 = "692-720"
        721-800 = "721-800"
        801-905 = "801-905"
        906-980 = "906-980"
        981-1030 = "981-1030"
        1031-1094 = "1031-1094"
        1095-1269 = "1095-1269"
        1270-1379 = "1270-1379"
        1380-1424 = "1380-1424"
        1425-1520 = "1425-1520"
        1521-high = "1521-";
run;

data polizzebymassa;
    set polizzebymassa;
    levelmassa = massa;
    format levelmassa formatmassa.;
run;

/* Calcolo le informazioni a livello di ogni gruppo */
proc means data = polizzebymassa nway noprint;
    class levelmassa;
    var massa totfreqsin;
    weight totespo;
    output out = polizzebylevelmassa1 mean = massamed totfreqsinmed;
run;

proc means data = polizzebymassa nway noprint;
    class levelmassa;
    var totespo totnsin totdannotot totnpol;
    output out = polizzebylevelmassa2 sum = totespo totnsin totdannotot totnpol;
run;

data polizzebylevelmassa;
    merge polizzebylevelmassa1 polizzebylevelmassa2;
    by levelmassa;
    drop _type_ _freq_;
run;

/* Standardizzo massa e freqsin in modo da poterle usare assieme per l'accorpamento */
proc standard data = polizzebylevelmassa out = polizzebylevelmassastd mean = 0 std = 1;
    weight totespo;
    var massamed totfreqsinmed;
run;

data polizzebylevelmassastd;
    set polizzebylevelmassastd;
    rename totfreqsinmed = totfreqsinmedstd;
    rename massamed = massamedstd;
run;

data polizzebylevelmassa;
    merge polizzebylevelmassa polizzebylevelmassastd;
    by levelmassa;
run;

proc print data = polizzebylevelmassastd;
run;

Obs,levelmassa,massamedstd,totfreqsinmedstd,totespo,totnsin,totdannotot,totnpol
1,555-691,0,-0.020622,5521.44,451,1061529.72,7300
2,692-720,0,-0.012126,16065.77,1453,3658464.65,20597
3,721-800,0,-0.004167,26549.5,2619,7429810.95,35149
4,801-905,0,0.006782,19165.92,2107,5922241.99,27196
5,906-980,0,0.011072,12906.82,1476,5335631.45,18596
6,981-1030,0,-0.001131,11711.9,1192,3311423.85,16285
7,1031-1094,0,-0.001358,7287.62,740,1931281.76,10485
8,1095-1269,0,0.004669,16323.8,1759,6695084.72,24090
9,1270-1379,0,0.008103,4483.5,499,1539118.74,7006
10,1380-1424,0,-0.014131,1052.35,93,327804.52,1706


In [48]:
/* Effettuo il clustering usando sia massamedstd che totfreqsinmedstd */
proc cluster method = ward data = polizzebylevelmassa outtree = clustermassa;
    id levelmassa;
    var massamedstd totfreqsinmedstd;
    freq totespo;
    copy totespo totnsin totdannotot massamed totfreqsinmed;
run;

Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix,Eigenvalues of the Covariance Matrix
Unnamed: 0_level_1,Eigenvalue,Difference,Proportion,Cumulative
1,0.00014846,0.00011849,0.832,0.832
2,2.998e-05,,0.168,1.0

0,1
Root-Mean-Square Total-Sample Standard Deviation,0.009446

0,1
Root-Mean-Square Distance Between Observations,0.018891

Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History,Cluster History
Number of Clusters,Clusters Joined,Clusters Joined.1,Freq,Semipartial R-Square,R-Square,Tie
11,981-1030,1031-1094,18998,0.0016,0.998,
10,1095-1269,1270-1379,20806,0.0083,0.99,
9,1425-1520,1521-,2213,0.0108,0.979,
8,801-905,906-980,32071,0.0114,0.968,
7,555-691,692-720,21586,0.0143,0.954,
6,CL10,1380-1424,21858,0.0217,0.932,
5,CL11,CL6,40856,0.049,0.883,
4,CL7,721-800,48135,0.0627,0.82,
3,CL5,CL9,43069,0.1295,0.691,
2,CL8,CL3,75140,0.1453,0.545,


Con 10 cluster arrivo a $R^2 ≥ 0.99$

In [49]:
proc tree data = clustermassa nclusters = 10 out = cluster10massa noprint;
    id levelmassa;
    copy totespo totnsin totdannotot massamed totfreqsinmed;
run;

/* cluster10massa contiene tante righe quanti erano i cluster nel preraggruppamento manuale */
proc print data = cluster10massa;
run;

Obs,levelmassa,totespo,totnsin,totdannotot,massamed,totfreqsinmed,CLUSTER,CLUSNAME
1,981-1030,11711.9,1192,3311423.85,1007,0.10178,1,CL11
2,1031-1094,7287.62,740,1931281.76,1065,0.10154,1,CL11
3,1095-1269,16323.8,1759,6695084.72,1171,0.10776,2,CL10
4,1270-1379,4483.5,499,1539118.74,1303,0.1113,2,CL10
5,1425-1520,1499.2,196,581444.16,1459,0.13074,3,1425-1520
6,1521-,714.5,106,279163.49,1757,0.14835,4,1521-
7,801-905,19165.92,2107,5922241.99,860,0.10993,5,801-905
8,906-980,12906.82,1476,5335631.45,939,0.11436,6,906-980
9,555-691,5521.44,451,1061529.72,663,0.08168,7,555-691
10,692-720,16065.77,1453,3658464.65,706,0.09044,8,692-720


In [50]:
/* polizzebyclustereta contiene una riga per ogni cluster e indica di ogni cluster una serie di informazioni */
proc means data = cluster10massa nway noprint;
    class cluster;
    var totespo totnsin totdannotot;
    output out = polizzebyclustermassa sum = totespocl totnsincl totdannototcl;
run;

data polizzebyclustermassa;
    set polizzebyclustermassa;
    
    totfreqsincl = totnsincl / totespocl;
    if totnsincl > 0 then totdannomedcl = totdannototcl / totnsincl;
        else totdannomedcl = 0;
    totqdcl = totdannototcl / totespocl;   
        
    drop _type_;
    
    format totfreqsincl 5.3;
    format totespocl 10.2;
    format totdannototcl 10.2;
    format totqdcl 10.2;
run;

proc print data = polizzebyclustermassa;
run;

Obs,CLUSTER,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,1,2,18999.52,1932,5242705.61,0.102,2713.62,275.94
2,2,2,20807.3,2258,8234203.46,0.109,3646.68,395.74
3,3,1,1499.2,196,581444.16,0.131,2966.55,387.84
4,4,1,714.5,106,279163.49,0.148,2633.62,390.71
5,5,1,19165.92,2107,5922241.99,0.11,2810.75,309.0
6,6,1,12906.82,1476,5335631.45,0.114,3614.93,413.4
7,7,1,5521.44,451,1061529.72,0.082,2353.72,192.26
8,8,1,16065.77,1453,3658464.65,0.09,2517.87,227.72
9,9,1,1052.35,93,327804.52,0.088,3524.78,311.5
10,10,1,26549.5,2619,7429810.95,0.099,2836.89,279.85


In [51]:
proc sort data = cluster10massa;
    by cluster;
run;

/* Arricchisco il dataset cluster9eta con le informazioni dei cluster a cui ogni gruppo appartiene appartengono */
data cluster10massa;
    merge cluster10massa polizzebyclustermassa;
    by cluster;
run;

proc print data = cluster10massa;
run;

Obs,levelmassa,totespo,totnsin,totdannotot,massamed,totfreqsinmed,CLUSTER,CLUSNAME,_FREQ_,totespocl,totnsincl,totdannototcl,totfreqsincl,totdannomedcl,totqdcl
1,981-1030,11711.9,1192,3311423.85,1007,0.10178,1,CL11,2,18999.52,1932,5242705.61,0.102,2713.62,275.94
2,1031-1094,7287.62,740,1931281.76,1065,0.10154,1,CL11,2,18999.52,1932,5242705.61,0.102,2713.62,275.94
3,1095-1269,16323.8,1759,6695084.72,1171,0.10776,2,CL10,2,20807.3,2258,8234203.46,0.109,3646.68,395.74
4,1270-1379,4483.5,499,1539118.74,1303,0.1113,2,CL10,2,20807.3,2258,8234203.46,0.109,3646.68,395.74
5,1425-1520,1499.2,196,581444.16,1459,0.13074,3,1425-1520,1,1499.2,196,581444.16,0.131,2966.55,387.84
6,1521-,714.5,106,279163.49,1757,0.14835,4,1521-,1,714.5,106,279163.49,0.148,2633.62,390.71
7,801-905,19165.92,2107,5922241.99,860,0.10993,5,801-905,1,19165.92,2107,5922241.99,0.11,2810.75,309.0
8,906-980,12906.82,1476,5335631.45,939,0.11436,6,906-980,1,12906.82,1476,5335631.45,0.114,3614.93,413.4
9,555-691,5521.44,451,1061529.72,663,0.08168,7,555-691,1,5521.44,451,1061529.72,0.082,2353.72,192.26
10,692-720,16065.77,1453,3658464.65,706,0.09044,8,692-720,1,16065.77,1453,3658464.65,0.09,2517.87,227.72


### Assegnazione formati

In [52]:
/* prov */
proc format;
    value $classprov
        "RC","VT","CN","OR","RA","VC","FE","LC","SS","VV","BZ","PV","RG","CH","PG","AR","TE","TN","VR" = "prov1"
        "BN","PO","IM","BA" = "prov2"
        "EN","NO","SV","VI","MC","MT","RE","BS","RI","AG","AV","PC" = "prov3"
        "AL","SO","GO","LT","PR","PN","GR","UD","TV","PD" = "prov4"
        "SI","TR","AP","LO","FG","ME","LU","CZ","FR","CT","VE","BG","IS","MO","TP" = "prov5"
        "CL","PI","VA","BR","FO","LI","SR","CO","SA" = "prov6"
        "LE","RN","AN","MS","BO","GE","TS" = "prov7"
        "CB","FI","NU","PE","TO","MI" = "prov8"
        "PT","TA","SP","CA","ROMA","PA" = "prov9"
        "BL","PZ","BI","CR","CS","AT","MN","PS" = "prov10"
        "AO","NA","CE","KR" = "prov11"
        "AQ","RSM","VB","RO" = "prov12";
run;

/* eta */
proc format;
    value classeta
        low-22 = "18-22"
        23-26 = "23-26"
        27-30 = "27-30"
        31-34 = "31-34"
        35-43 = "35-43"
        44-51 = "44-51"
        52-60 = "52-60"
        61-69 = "61-69"
        70-high = "70-";
run;

/* potf */
proc format;
    value classpotf
        low-13 = "8-13"
        14-15 = "14-15"
        16 = "16"
        17-21 = "17-21"
        22-23 = "22-23"
        24-26 = "24-26"
        27-28 = "27-28"
        29-30 = "29-30"
        31-high = "31-";
run;

/* potkil */
proc format;
    value classpotkil
        low-26 = "17-26"
        27-33 = "27-33"
        34-39 = "34-39"
        40-49 = "40-49"
        50-57 = "50-57"
        58-65 = "58-65"
        66-78 = "66-78"
        79-92 = "79-92"
        93-108 = "93-108"
        109-123 = "109-123"
        124-139 = "124-139"
        140-143 = "140-143"
        144-150 = "144-150"
        151-high = "151-";
run;

/* massa */
proc format;
    value classmassa
        low-691 = "555-691"
        692-720 = "692-720"
        721-800 = "721-800"
        801-905 = "801-905"
        906-980 = "906-980"
        981-1030 = "981-1030"
        1031-1094 = "1031-1094"
        1095-1269 = "1095-1269"
        1270-1379 = "1270-1379"
        1380-1424 = "1380-1424"
        1425-1520 = "1425-1520"
        1521-high = "1521-";
run;

/* Assegno i formati */
data polizze;
    set polizze;
    
    format prov $classprov.;
    format eta classeta.;
    format potf classpotf.;
    format potkil classpotkil.;
    format massa classmassa.;
run;

proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin
1,F,35-43,prov8,NO,B,17-26,555-691,8-13,1.0,0,0.0,0
2,M,35-43,prov5,NO,B,34-39,721-800,8-13,1.0,0,0.0,0
3,M,44-51,prov7,NO,B,58-65,801-905,14-15,1.0,0,0.0,0
4,F,18-22,prov10,NO,B,34-39,692-720,8-13,1.0,0,0.0,0
5,M,44-51,prov9,NO,B,27-33,721-800,8-13,0.2,0,0.0,0
6,F,44-51,prov8,NO,B,40-49,721-800,14-15,1.0,0,0.0,0
7,M,35-43,prov1,NO,D,50-57,1031-1094,17-21,1.0,0,0.0,0
8,M,70-,prov1,SI,B,58-65,906-980,14-15,1.0,0,0.0,0
9,F,44-51,prov1,NO,B,58-65,801-905,14-15,1.0,0,0.0,0
10,M,52-60,prov1,NO,B,79-92,1095-1269,17-21,1.0,1,359.057,1


### Frequenza sinistri nei cluster

In [53]:
/* prov */
proc sgplot data=polizze;
    vline prov / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [54]:
/* eta */
proc sgplot data=polizze;
    vline eta / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [55]:
/* potf */
proc sgplot data=polizze;
    vline potf / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [56]:
/* potkil */
proc sgplot data=polizze;
    vline potkil / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

In [57]:
/* massa */
proc sgplot data=polizze;
    vline massa / response=freqsin weight=espo stat=mean limitstat=clm alpha = .05;
run;

## 3) Tariffazione con i GLM

### 3.A.1 Modelli per il numero di sinistri

#### Preparazione dei dati

In [58]:
/* Dataset per modelli con dati individuali */
data polizze;
    set polizze;
    lnespo = log(espo);
    freqsin = nsin / espo;
run;


/* Dataset per modelli con dati raggruppati */
proc means data = polizze nway noprint;
    class sesso capoluogo bendie prov eta potf potkil massa;
    var espo nsin dannotot;
    output out = polizzecum sum = espocum nsincum dannototcum;
run;

data polizzecum;
    set polizzecum;
    lnespocum = log(espocum);
    freqsincum = nsincum / espocum;
run;

proc print data = polizzecum (obs = 10);
run;

Obs,Sesso,Capoluogo,Bendie,Prov,Eta,potf,Potkil,Massa,_TYPE_,_FREQ_,espocum,nsincum,dannototcum,lnespocum,freqsincum
1,F,NO,B,prov3,18-22,8-13,17-26,555-691,255,3,1.688,0,0.0,0.52354,0.0
2,F,NO,B,prov3,18-22,8-13,17-26,692-720,255,23,18.997,1,2019.7,2.94428,0.05264
3,F,NO,B,prov3,18-22,8-13,27-33,555-691,255,4,3.392,0,0.0,1.22142,0.0
4,F,NO,B,prov3,18-22,8-13,27-33,692-720,255,30,23.411,0,0.0,3.15321,0.0
5,F,NO,B,prov3,18-22,8-13,27-33,721-800,255,64,46.926,4,3753.05,3.84857,0.085241
6,F,NO,B,prov3,18-22,8-13,27-33,906-980,255,1,0.923,0,0.0,-0.08013,0.0
7,F,NO,B,prov3,18-22,8-13,34-39,692-720,255,7,5.671,0,0.0,1.73537,0.0
8,F,NO,B,prov3,18-22,8-13,34-39,721-800,255,48,37.709,3,4314.97,3.6299,0.079557
9,F,NO,B,prov3,18-22,8-13,34-39,801-905,255,4,3.674,0,0.0,1.30128,0.0
10,F,NO,B,prov3,18-22,8-13,40-49,721-800,255,19,14.098,1,394.96,2.64603,0.070932


In [59]:
/* Il dataset polizzecum ha 26 249 righe */
proc summary data = polizzecum;
    output out = conta_righe;
run;

proc print data = conta_righe;
run;

Obs,_TYPE_,_FREQ_
1,0,26249


#### 3.A.1.1 Modello di Poisson

In [60]:
/* Dati individuali, eta */
proc genmod data = polizze;
    class eta(ref='35-43');
    model nsin = eta /
        dist = poisson
        offset = lnespo
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZE
Distribution,Poisson
Link Function,Log
Dependent Variable,nsin
Offset Variable,lnespo

0,1
Number of Observations Read,172161
Number of Observations Used,172161

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,170000.0,66656.7461,0.3872
Scaled Deviance,170000.0,66656.7461,0.3872
Pearson Chi-Square,170000.0,211077.4956,1.2261
Scaled Pearson X2,170000.0,211077.4956,1.2261
Log Likelihood,,-44647.2568,
Full Log Likelihood,,-45351.7365,
AIC (smaller is better),,90721.473,
AICC (smaller is better),,90721.474,
BIC (smaller is better),,90811.9787,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-2.4884,0.0218,-2.5311,-2.4457,13046.5,<.0001
Eta,18-22,1,0.7667,0.0374,0.6934,0.84,419.96,<.0001
Eta,23-26,1,0.4884,0.0338,0.4222,0.5546,209.15,<.0001
Eta,27-30,1,0.2142,0.0349,0.1458,0.2826,37.63,<.0001
Eta,31-34,1,0.0971,0.0363,0.0261,0.1682,7.17,0.0074
Eta,44-51,1,0.1957,0.031,0.135,0.2565,39.86,<.0001
Eta,52-60,1,0.2299,0.0318,0.1676,0.2922,52.34,<.0001
Eta,61-69,1,0.0906,0.0382,0.0158,0.1655,5.64,0.0176
Eta,70-,1,0.1815,0.0487,0.086,0.2769,13.88,0.0002
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,518.85,<.0001


In [61]:
/* Dati raggruppati, eta */
proc genmod data = polizzecum;
    class eta(ref='35-43');
    model nsincum = eta /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,20367.2147,0.7762
Scaled Deviance,26000.0,20367.2147,0.7762
Pearson Chi-Square,26000.0,39689.2852,1.5125
Scaled Pearson X2,26000.0,39689.2852,1.5125
Log Likelihood,,-11513.4125,
Full Log Likelihood,,-18243.0724,
AIC (smaller is better),,36504.1449,
AICC (smaller is better),,36504.1517,
BIC (smaller is better),,36577.7233,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-2.4884,0.0218,-2.5311,-2.4457,13046.5,<.0001
Eta,18-22,1,0.7667,0.0374,0.6934,0.84,419.96,<.0001
Eta,23-26,1,0.4884,0.0338,0.4222,0.5546,209.15,<.0001
Eta,27-30,1,0.2142,0.0349,0.1458,0.2826,37.63,<.0001
Eta,31-34,1,0.0971,0.0363,0.0261,0.1682,7.17,0.0074
Eta,44-51,1,0.1957,0.031,0.135,0.2565,39.86,<.0001
Eta,52-60,1,0.2299,0.0318,0.1676,0.2922,52.34,<.0001
Eta,61-69,1,0.0906,0.0382,0.0158,0.1655,5.64,0.0176
Eta,70-,1,0.1815,0.0487,0.086,0.2769,13.88,0.0002
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,518.85,<.0001


In [62]:
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78');
    model nsincum = eta potkil /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43
Potkil,14,109-123 124-139 140-143 144-150 151- 17-26 27-33 34-39 40-49 50-57 58-65 79-92 93-108 66-78

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,20231.0225,0.7714
Scaled Deviance,26000.0,20231.0225,0.7714
Pearson Chi-Square,26000.0,38161.884,1.4551
Scaled Pearson X2,26000.0,38161.884,1.4551
Log Likelihood,,-11445.3164,
Full Log Likelihood,,-18174.9764,
AIC (smaller is better),,36393.9527,
AICC (smaller is better),,36393.9913,
BIC (smaller is better),,36573.8111,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-2.3308,0.0317,-2.3929,-2.2687,5410.09,<.0001
Eta,18-22,1,0.7993,0.0376,0.7256,0.8731,451.02,<.0001
Eta,23-26,1,0.5031,0.0338,0.4368,0.5694,221.04,<.0001
Eta,27-30,1,0.22,0.0349,0.1515,0.2885,39.66,<.0001
Eta,31-34,1,0.098,0.0363,0.0269,0.1691,7.30,0.0069
Eta,44-51,1,0.1947,0.031,0.1339,0.2555,39.42,<.0001
Eta,52-60,1,0.2362,0.0318,0.1739,0.2985,55.16,<.0001
Eta,61-69,1,0.1113,0.0382,0.0363,0.1862,8.47,0.0036
Eta,70-,1,0.2174,0.0488,0.1217,0.3131,19.82,<.0001
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,548.35,<.0001
Potkil,13,136.19,<.0001


In [63]:
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980');
    model nsincum = eta potkil massa /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43
Potkil,14,109-123 124-139 140-143 144-150 151- 17-26 27-33 34-39 40-49 50-57 58-65 79-92 93-108 66-78
Massa,12,1031-1094 1095-1269 1270-1379 1380-1424 1425-1520 1521- 555-691 692-720 721-800 801-905 981-1030 906-980

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,20178.0174,0.7697
Scaled Deviance,26000.0,20178.0174,0.7697
Pearson Chi-Square,26000.0,37643.0704,1.4359
Scaled Pearson X2,26000.0,37643.0704,1.4359
Log Likelihood,,-11418.8138,
Full Log Likelihood,,-18148.4738,
AIC (smaller is better),,36362.9476,
AICC (smaller is better),,36363.0332,
BIC (smaller is better),,36632.7352,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-2.2876,0.043,-2.3718,-2.2033,2832.47,<.0001
Eta,18-22,1,0.7991,0.038,0.7247,0.8735,443.04,<.0001
Eta,23-26,1,0.5046,0.0341,0.4379,0.5714,219.49,<.0001
Eta,27-30,1,0.2227,0.035,0.1541,0.2913,40.47,<.0001
Eta,31-34,1,0.101,0.0363,0.0299,0.1722,7.75,0.0054
Eta,44-51,1,0.193,0.031,0.1322,0.2538,38.71,<.0001
Eta,52-60,1,0.2356,0.0318,0.1732,0.298,54.80,<.0001
Eta,61-69,1,0.1111,0.0383,0.0361,0.1861,8.43,0.0037
Eta,70-,1,0.2193,0.0489,0.1235,0.3151,20.13,<.0001
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,537.11,<.0001
Potkil,13,51.08,<.0001
Massa,11,53.01,<.0001


In [64]:
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') potf;
    model nsincum = eta potkil massa potf/
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43
Potkil,14,109-123 124-139 140-143 144-150 151- 17-26 27-33 34-39 40-49 50-57 58-65 79-92 93-108 66-78
Massa,12,1031-1094 1095-1269 1270-1379 1380-1424 1425-1520 1521- 555-691 692-720 721-800 801-905 981-1030 906-980
potf,9,14-15 16 17-21 22-23 24-26 27-28 29-30 31- 8-13

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,20079.5225,0.7662
Scaled Deviance,26000.0,20079.5225,0.7662
Pearson Chi-Square,26000.0,36510.7622,1.3931
Scaled Pearson X2,26000.0,36510.7622,1.3931
Log Likelihood,,-11369.5664,
Full Log Likelihood,,-18099.2263,
AIC (smaller is better),,36280.4526,
AICC (smaller is better),,36280.5841,
BIC (smaller is better),,36615.6433,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-2.5882,0.0602,-2.7061,-2.4703,1851.24,<.0001
Eta,18-22,1,0.7942,0.038,0.7198,0.8686,437.50,<.0001
Eta,23-26,1,0.4959,0.0341,0.4291,0.5627,211.78,<.0001
Eta,27-30,1,0.215,0.035,0.1464,0.2836,37.72,<.0001
Eta,31-34,1,0.0979,0.0363,0.0267,0.169,7.27,0.0070
Eta,44-51,1,0.1919,0.031,0.1311,0.2527,38.26,<.0001
Eta,52-60,1,0.2371,0.0318,0.1747,0.2995,55.49,<.0001
Eta,61-69,1,0.1149,0.0383,0.0399,0.1899,9.01,0.0027
Eta,70-,1,0.2256,0.0489,0.1298,0.3215,21.30,<.0001
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,526.37,<.0001
Potkil,13,25.04,0.0228
Massa,11,51.68,<.0001
potf,8,98.49,<.0001


In [65]:
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') bendie;
    model nsincum = eta potkil massa bendie /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43
Potkil,14,109-123 124-139 140-143 144-150 151- 17-26 27-33 34-39 40-49 50-57 58-65 79-92 93-108 66-78
Massa,12,1031-1094 1095-1269 1270-1379 1380-1424 1425-1520 1521- 555-691 692-720 721-800 801-905 981-1030 906-980
Bendie,2,B D

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19964.4087,0.7616
Scaled Deviance,26000.0,19964.4087,0.7616
Pearson Chi-Square,26000.0,35130.4937,1.3401
Scaled Pearson X2,26000.0,35130.4937,1.3401
Log Likelihood,,-11312.0095,
Full Log Likelihood,,-18041.6694,
AIC (smaller is better),,36151.3389,
AICC (smaller is better),,36151.4296,
BIC (smaller is better),,36429.3019,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.8284,0.0522,-1.9308,-1.726,1224.67,<.0001
Eta,18-22,1,0.7887,0.038,0.7143,0.8631,431.53,<.0001
Eta,23-26,1,0.4885,0.0341,0.4217,0.5553,205.50,<.0001
Eta,27-30,1,0.2112,0.035,0.1426,0.2798,36.38,<.0001
Eta,31-34,1,0.0929,0.0363,0.0217,0.164,6.55,0.0105
Eta,44-51,1,0.1911,0.031,0.1303,0.2519,37.97,<.0001
Eta,52-60,1,0.2374,0.0318,0.175,0.2997,55.64,<.0001
Eta,61-69,1,0.1144,0.0383,0.0394,0.1894,8.94,0.0028
Eta,70-,1,0.2273,0.0489,0.1316,0.3231,21.64,<.0001
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,518.15,<.0001
Potkil,13,78.41,<.0001
Massa,11,48.61,<.0001
Bendie,1,213.61,<.0001


In [66]:
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') bendie sesso;
    model nsincum = eta potkil massa bendie sesso /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43
Potkil,14,109-123 124-139 140-143 144-150 151- 17-26 27-33 34-39 40-49 50-57 58-65 79-92 93-108 66-78
Massa,12,1031-1094 1095-1269 1270-1379 1380-1424 1425-1520 1521- 555-691 692-720 721-800 801-905 981-1030 906-980
Bendie,2,B D
Sesso,2,F M

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19964.3907,0.7616
Scaled Deviance,26000.0,19964.3907,0.7616
Pearson Chi-Square,26000.0,35140.5681,1.3405
Scaled Pearson X2,26000.0,35140.5681,1.3405
Log Likelihood,,-11312.0005,
Full Log Likelihood,,-18041.6604,
AIC (smaller is better),,36153.3209,
AICC (smaller is better),,36153.417,
BIC (smaller is better),,36439.4593,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.8281,0.0523,-1.9306,-1.7256,1221.91,<.0001
Eta,18-22,1,0.7886,0.038,0.7141,0.863,431.18,<.0001
Eta,23-26,1,0.4885,0.0341,0.4217,0.5552,205.48,<.0001
Eta,27-30,1,0.2112,0.035,0.1426,0.2798,36.39,<.0001
Eta,31-34,1,0.0929,0.0363,0.0218,0.164,6.55,0.0105
Eta,44-51,1,0.191,0.031,0.1302,0.2518,37.90,<.0001
Eta,52-60,1,0.237,0.0319,0.1745,0.2996,55.18,<.0001
Eta,61-69,1,0.1138,0.0385,0.0384,0.1893,8.75,0.0031
Eta,70-,1,0.2266,0.0492,0.1302,0.323,21.21,<.0001
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,518.13,<.0001
Potkil,13,77.83,<.0001
Massa,11,48.62,<.0001
Bendie,1,212.73,<.0001
Sesso,1,0.02,0.8933


In [67]:
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') bendie capoluogo;
    model nsincum = eta potkil massa bendie capoluogo /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,9,18-22 23-26 27-30 31-34 44-51 52-60 61-69 70- 35-43
Potkil,14,109-123 124-139 140-143 144-150 151- 17-26 27-33 34-39 40-49 50-57 58-65 79-92 93-108 66-78
Massa,12,1031-1094 1095-1269 1270-1379 1380-1424 1425-1520 1521- 555-691 692-720 721-800 801-905 981-1030 906-980
Bendie,2,B D
Capoluogo,2,NO SI

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19721.4813,0.7523
Scaled Deviance,26000.0,19721.4813,0.7523
Pearson Chi-Square,26000.0,32393.3269,1.2357
Scaled Pearson X2,26000.0,32393.3269,1.2357
Log Likelihood,,-11190.5458,
Full Log Likelihood,,-17920.2058,
AIC (smaller is better),,35910.4115,
AICC (smaller is better),,35910.5076,
BIC (smaller is better),,36196.5499,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.5455,0.055,-1.6533,-1.4378,790.46,<.0001
Eta,18-22,1,0.8086,0.038,0.7342,0.8831,453.17,<.0001
Eta,23-26,1,0.4985,0.0341,0.4317,0.5653,213.96,<.0001
Eta,27-30,1,0.2148,0.035,0.1462,0.2834,37.64,<.0001
Eta,31-34,1,0.0922,0.0363,0.0211,0.1634,6.46,0.0110
Eta,44-51,1,0.1864,0.031,0.1256,0.2472,36.10,<.0001
Eta,52-60,1,0.2254,0.0318,0.163,0.2878,50.14,<.0001
Eta,61-69,1,0.0937,0.0383,0.0186,0.1687,5.98,0.0144
Eta,70-,1,0.1927,0.0489,0.0968,0.2886,15.52,<.0001
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,8,548.11,<.0001
Potkil,13,72.46,<.0001
Massa,11,48.64,<.0001
Bendie,1,228.22,<.0001
Capoluogo,1,242.93,<.0001


In [68]:
/* Accorpamento classi */

/* eta */
/* Accetti H0 - > Sì accorpamenti */
ods select contrasts;
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') bendie capoluogo prov(ref='prov1');
    model nsincum = eta potkil massa bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3;
    contrast "eta 61-" eta 0  0  0  0  0  0  1 -1  0;
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,DF,Chi-Square,Pr > ChiSq,Type
eta 61-,1,3.67,0.0554,LR


In [69]:
/* potkil */
/* Accetto H0 -> Sì accorpamenti */
ods select contrasts;
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') bendie capoluogo prov(ref='prov1');
    model nsincum = eta potkil massa bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3;
    contrast "potkil" potkil 1 -1  0  0  0  0  0  0  0  0  0  0  0  0,
                      potkil 0  1 -1  0  0  0  0  0  0  0  0  0  0  0,
                      potkil 0  0  1 -1  0  0  0  0  0  0  0  0  0  0,
                      potkil 0  0  0  1 -1  0  0  0  0  0  0  0  0  0;
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,DF,Chi-Square,Pr > ChiSq,Type
potkil,4,10.38,0.0345,LR


In [70]:
/* massa */
/* Accetto H0 -> Sì accorpamenti */
ods select contrasts;
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil(ref='66-78') massa(ref='906-980') bendie capoluogo prov(ref='prov1');
    model nsincum = eta potkil massa bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3;
    contrast "massa" massa  0  0  0  0  0  0  1 -1  0  0  0  0,
                     massa  0  0  0  0  0  0  0  1 -1  0  0  0,
                     massa  0  0  0  0  0  0  0  0  1 -1  0  0,
                     massa  0  0  0  0  0  0  0  0  0  0  1 -1;
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,DF,Chi-Square,Pr > ChiSq,Type
massa,4,10.34,0.035,LR


In [71]:
/* Faccio gli accorpamenti */

/* eta */
proc format;
    value classeta
        low-22 = "18-22"
        23-26 = "23-26"
        27-30 = "27-30"
        31-34 = "31-34"
        35-43 = "35-43"
        44-51 = "44-51"
        52-60 = "52-60"
        61-high = "61-";
run;

/* potkil */
proc format;
    value classpotkil
        low-57 = "-57"
        58-high = "58-";
run;

/* massa */
proc format;
    value classmassa
        low-1030 = "-1030"
        1031-1379 = "1031-1379"
        1380-1424 = "1380-1424"
        1425-high = "1425-";
run;

/* Il format si aggiorna in automatico nel dataset */
proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo
1,F,35-43,prov8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
2,M,35-43,prov5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
3,M,44-51,prov7,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
4,F,18-22,prov10,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
5,M,44-51,prov9,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944
6,F,44-51,prov8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0
7,M,35-43,prov1,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0
8,M,61-,prov1,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
9,F,44-51,prov1,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
10,M,52-60,prov1,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0


In [72]:
/* modello con livelli accorpati */
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil massa bendie capoluogo prov(ref='prov1');
    model nsincum = eta potkil massa bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3; 
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,8,18-22 23-26 27-30 31-34 44-51 52-60 61- 35-43
Potkil,2,-57 58-
Massa,4,-1030 1031-1379 1380-1424 1425-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,12,prov10 prov11 prov12 prov2 prov3 prov4 prov5 prov6 prov7 prov8 prov9 prov1

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19374.9753,0.7388
Scaled Deviance,26000.0,19374.9753,0.7388
Pearson Chi-Square,26000.0,31273.9757,1.1926
Scaled Pearson X2,26000.0,31273.9757,1.1926
Log Likelihood,,-11017.2928,
Full Log Likelihood,,-17746.9527,
AIC (smaller is better),,35543.9055,
AICC (smaller is better),,35543.955,
BIC (smaller is better),,35748.29,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.7699,0.0677,-1.9027,-1.6372,682.67,<.0001
Eta,18-22,1,0.84,0.0378,0.766,0.914,494.78,<.0001
Eta,23-26,1,0.5168,0.0339,0.4503,0.5833,232.00,<.0001
Eta,27-30,1,0.2237,0.035,0.1551,0.2922,40.92,<.0001
Eta,31-34,1,0.096,0.0363,0.0249,0.1672,7.01,0.0081
Eta,44-51,1,0.1825,0.031,0.1217,0.2433,34.64,<.0001
Eta,52-60,1,0.215,0.0318,0.1526,0.2774,45.67,<.0001
Eta,61-,1,0.1164,0.0336,0.0505,0.1824,11.98,0.0005
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.2159,0.0259,-0.2666,-0.1652,69.74,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,7,598.63,<.0001
Potkil,1,67.96,<.0001
Massa,3,18.65,0.0003
Bendie,1,231.49,<.0001
Capoluogo,1,163.48,<.0001
Prov,11,403.2,<.0001


In [73]:
/* rimuovo massa */
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil bendie capoluogo prov(ref='prov1');
    model nsincum = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3; 
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,8,18-22 23-26 27-30 31-34 44-51 52-60 61- 35-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,12,prov10 prov11 prov12 prov2 prov3 prov4 prov5 prov6 prov7 prov8 prov9 prov1

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19393.6287,0.7395
Scaled Deviance,26000.0,19393.6287,0.7395
Pearson Chi-Square,26000.0,31381.2516,1.1965
Scaled Pearson X2,26000.0,31381.2516,1.1965
Log Likelihood,,-11026.6195,
Full Log Likelihood,,-17756.2794,
AIC (smaller is better),,35556.5588,
AICC (smaller is better),,35556.5974,
BIC (smaller is better),,35736.4173,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.8533,0.0439,-1.9394,-1.7672,1780.63,<.0001
Eta,18-22,1,0.8516,0.0376,0.7778,0.9253,512.06,<.0001
Eta,23-26,1,0.5255,0.0338,0.4592,0.5918,241.26,<.0001
Eta,27-30,1,0.228,0.0349,0.1596,0.2965,42.61,<.0001
Eta,31-34,1,0.0976,0.0363,0.0265,0.1687,7.24,0.0071
Eta,44-51,1,0.1832,0.031,0.1225,0.244,34.93,<.0001
Eta,52-60,1,0.2165,0.0318,0.1541,0.2788,46.32,<.0001
Eta,61-,1,0.1192,0.0336,0.0532,0.1851,12.55,0.0004
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1544,0.019,-0.1917,-0.1171,65.89,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,7,620.66,<.0001
Potkil,1,64.81,<.0001
Bendie,1,230.93,<.0001
Capoluogo,1,163.67,<.0001
Prov,11,407.01,<.0001


In [74]:
/* Province */
/* Accetto H0 -> Accorpo i livelli */
ods select contrasts;
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil massa bendie capoluogo prov(ref='prov1');
    model nsincum = eta potkil massa bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3;
    contrast "province" prov 1  0 -1  0  0  0  0  0  0  0  0  0, /* prov10-prov12 */
                        prov 0  0  0  1  0  0  0  0  0  0 -1  0, /* prov2-prov9 */
                        prov 0  0  0  0  1  0  0  0  0  0  0 -1; /* prov1-prov3 */
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,DF,Chi-Square,Pr > ChiSq,Type
province,3,7.56,0.0561,LR


In [75]:
/* Accorpo le province */
/* prov */
proc format;
    value $classprov
        "RC","VT","CN","OR","RA","VC","FE","LC","SS","VV","BZ","PV","RG","CH","PG","AR","TE","TN","VR","EN","NO","SV","VI","MC","MT","RE","BS","RI","AG","AV","PC" = "prov1-3"
        "BN","PO","IM","BA","PT","TA","SP","CA","ROMA","PA" = "prov2-9"
        "AL","SO","GO","LT","PR","PN","GR","UD","TV","PD" = "prov4"
        "SI","TR","AP","LO","FG","ME","LU","CZ","FR","CT","VE","BG","IS","MO","TP" = "prov5"
        "CL","PI","VA","BR","FO","LI","SR","CO","SA" = "prov6"
        "LE","RN","AN","MS","BO","GE","TS" = "prov7"
        "CB","FI","NU","PE","TO","MI" = "prov8"
        "BL","PZ","BI","CR","CS","AT","MN","PS","AQ","RSM","VB","RO" = "prov10-12"
        "AO","NA","CE","KR" = "prov11";
run;

proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo
1,F,35-43,prov8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
2,M,35-43,prov5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
3,M,44-51,prov7,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
4,F,18-22,prov10-12,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
5,M,44-51,prov2-9,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944
6,F,44-51,prov8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0
7,M,35-43,prov1-3,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0
8,M,61-,prov1-3,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
9,F,44-51,prov1-3,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
10,M,52-60,prov1-3,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0


In [76]:
/* Modello di Poisson definitivo */
proc genmod data = polizzecum plots = stdreschi;
    class eta(ref='35-43') potkil bendie capoluogo prov(ref='prov1-3');
    model nsincum = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,8,18-22 23-26 27-30 31-34 44-51 52-60 61- 35-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19401.1531,0.7397
Scaled Deviance,26000.0,19401.1531,0.7397
Pearson Chi-Square,26000.0,31341.9422,1.1949
Scaled Pearson X2,26000.0,31341.9422,1.1949
Log Likelihood,,-11030.3817,
Full Log Likelihood,,-17760.0417,
AIC (smaller is better),,35558.0833,
AICC (smaller is better),,35558.1123,
BIC (smaller is better),,35713.4156,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.8338,0.0417,-1.9156,-1.752,1930.62,<.0001
Eta,18-22,1,0.8517,0.0376,0.778,0.9255,512.26,<.0001
Eta,23-26,1,0.5256,0.0338,0.4593,0.5919,241.34,<.0001
Eta,27-30,1,0.228,0.0349,0.1595,0.2964,42.58,<.0001
Eta,31-34,1,0.0974,0.0363,0.0263,0.1685,7.22,0.0072
Eta,44-51,1,0.1833,0.031,0.1225,0.244,34.94,<.0001
Eta,52-60,1,0.2171,0.0318,0.1547,0.2794,46.57,<.0001
Eta,61-,1,0.1193,0.0336,0.0533,0.1852,12.57,0.0004
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1546,0.019,-0.1919,-0.1173,66.05,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,7,620.9,<.0001
Potkil,1,64.97,<.0001
Bendie,1,230.67,<.0001
Capoluogo,1,164.52,<.0001
Prov,8,399.48,<.0001


In [77]:
/* Modello di Poisson definitivo */
proc genmod data = polizzecum plots = stdreschi(xbeta);
    class eta(ref='35-43') potkil bendie capoluogo prov(ref='prov1-3');
    model nsincum = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespocum;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,8,18-22 23-26 27-30 31-34 44-51 52-60 61- 35-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19401.1531,0.7397
Scaled Deviance,26000.0,19401.1531,0.7397
Pearson Chi-Square,26000.0,31341.9422,1.1949
Scaled Pearson X2,26000.0,31341.9422,1.1949
Log Likelihood,,-11030.3817,
Full Log Likelihood,,-17760.0417,
AIC (smaller is better),,35558.0833,
AICC (smaller is better),,35558.1123,
BIC (smaller is better),,35713.4156,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.8338,0.0417,-1.9156,-1.752,1930.62,<.0001
Eta,18-22,1,0.8517,0.0376,0.778,0.9255,512.26,<.0001
Eta,23-26,1,0.5256,0.0338,0.4593,0.5919,241.34,<.0001
Eta,27-30,1,0.228,0.0349,0.1595,0.2964,42.58,<.0001
Eta,31-34,1,0.0974,0.0363,0.0263,0.1685,7.22,0.0072
Eta,44-51,1,0.1833,0.031,0.1225,0.244,34.94,<.0001
Eta,52-60,1,0.2171,0.0318,0.1547,0.2794,46.57,<.0001
Eta,61-,1,0.1193,0.0336,0.0533,0.1852,12.57,0.0004
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1546,0.019,-0.1919,-0.1173,66.05,<.0001


#### 3.A.1.2 Modello di Poisson con sovradispersione

In [78]:
/* Modello con dati raggruppati */
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil bendie capoluogo prov(ref='prov1-3');
    model nsincum = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        scale = pearson
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,8,18-22 23-26 27-30 31-34 44-51 52-60 61- 35-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19401.1531,0.7397
Scaled Deviance,26000.0,16236.7808,0.619
Pearson Chi-Square,26000.0,31341.9422,1.1949
Scaled Pearson X2,26000.0,26230.0,1.0
Log Likelihood,,-9231.3013,
Full Log Likelihood,,-17760.0417,
AIC (smaller is better),,35558.0833,
AICC (smaller is better),,35558.1123,
BIC (smaller is better),,35713.4156,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.8338,0.0456,-1.9233,-1.7444,1615.73,<.0001
Eta,18-22,1,0.8517,0.0411,0.7711,0.9323,428.71,<.0001
Eta,23-26,1,0.5256,0.037,0.4531,0.5981,201.98,<.0001
Eta,27-30,1,0.228,0.0382,0.1531,0.3028,35.63,<.0001
Eta,31-34,1,0.0974,0.0397,0.0197,0.1752,6.04,0.0140
Eta,44-51,1,0.1833,0.0339,0.1169,0.2497,29.24,<.0001
Eta,52-60,1,0.2171,0.0348,0.1489,0.2852,38.97,<.0001
Eta,61-,1,0.1193,0.0368,0.0472,0.1913,10.52,0.0012
Eta,35-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1546,0.0208,-0.1953,-0.1138,55.28,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
Eta,7,26230,74.23,<.0001,519.63,<.0001
Potkil,1,26230,54.38,<.0001,54.38,<.0001
Bendie,1,26230,193.05,<.0001,193.05,<.0001
Capoluogo,1,26230,137.69,<.0001,137.69,<.0001
Prov,8,26230,41.79,<.0001,334.33,<.0001


In [79]:
/* Modello con dati raggruppati */
/* Accorpo ulteriormente i livelli di eta */
ods select contrasts;
proc genmod data = polizzecum;
    class eta(ref='35-43') potkil bendie capoluogo prov(ref='prov1-3');
    model nsincum = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        scale = pearson
        type3;
    contrast "eta" eta 0  0  0  1  0  0  0 -1; /* 31-34 con 35-43 */
                   /*eta 0  0  0  0  1 -1  0  0,*/ /* 44-51 con 52-60 */
                   /*eta 0  0  0  0  0  1 -1  0;*/ /* 52-60 con 61- */
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq,Type
eta,1,26230,5.98,0.0145,5.98,0.0145,LR


In [80]:
/* Accorpo i livelli di eta */

/* eta */
proc format;
    value classeta
        low-22 = "18-22"
        23-26 = "23-26"
        27-30 = "27-30"
        31-43 = "31-43"
        44-51 = "44-51"
        52-60 = "52-60"
        61-high = "61-";
run;

proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo
1,F,31-43,prov8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
2,M,31-43,prov5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
3,M,44-51,prov7,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
4,F,18-22,prov10-12,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
5,M,44-51,prov2-9,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944
6,F,44-51,prov8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0
7,M,31-43,prov1-3,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0
8,M,61-,prov1-3,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
9,F,44-51,prov1-3,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
10,M,52-60,prov1-3,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0


In [81]:
/* Modello di Poisson con sovradispersione definitivo */
proc genmod data = polizzecum plots = stdreschi(xbeta);
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
    model nsincum = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespocum
        scale = pearson
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZECUM
Distribution,Poisson
Link Function,Log
Dependent Variable,nsincum
Offset Variable,lnespocum

0,1
Number of Observations Read,26249
Number of Observations Used,26249

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,7,18-22 23-26 27-30 44-51 52-60 61- 31-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,26000.0,19408.301,0.7399
Scaled Deviance,26000.0,16187.7678,0.6171
Pearson Chi-Square,26000.0,31449.6198,1.1989
Scaled Pearson X2,26000.0,26231.0,1.0
Log Likelihood,,-9203.0267,
Full Log Likelihood,,-17763.6156,
AIC (smaller is better),,35563.2312,
AICC (smaller is better),,35563.2573,
BIC (smaller is better),,35710.3881,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.7995,0.0434,-1.8845,-1.7145,1722.27,<.0001
Eta,18-22,1,0.8176,0.0386,0.7419,0.8933,448.06,<.0001
Eta,23-26,1,0.4915,0.0342,0.4245,0.5584,207.06,<.0001
Eta,27-30,1,0.1939,0.0355,0.1244,0.2634,29.88,<.0001
Eta,44-51,1,0.1492,0.0308,0.0889,0.2095,23.50,<.0001
Eta,52-60,1,0.183,0.0317,0.1208,0.2452,33.23,<.0001
Eta,61-,1,0.0852,0.0339,0.0187,0.1517,6.31,0.0120
Eta,31-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1541,0.0208,-0.195,-0.1133,54.76,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
Eta,6,26231,85.32,<.0001,511.91,<.0001
Potkil,1,26231,53.87,<.0001,53.87,<.0001
Bendie,1,26231,192.73,<.0001,192.73,<.0001
Capoluogo,1,26231,137.32,<.0001,137.32,<.0001
Prov,8,26231,41.63,<.0001,333.02,<.0001


#### 3.A.1.3 Modello Binomiale Negativa

In [82]:
proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo
1,F,31-43,prov8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
2,M,31-43,prov5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
3,M,44-51,prov7,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
4,F,18-22,prov10-12,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
5,M,44-51,prov2-9,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944
6,F,44-51,prov8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0
7,M,31-43,prov1-3,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0
8,M,61-,prov1-3,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
9,F,44-51,prov1-3,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
10,M,52-60,prov1-3,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0


In [83]:
/* Stima preliminare di alpha */
/*ods exclude all;*/ /* Non stampo nell'output i risultati della proc genmod */
ods select ModelInfo;
proc genmod data = polizze;
    output out = stime pred = nsinatt;
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
    model nsin = eta potkil bendie capoluogo prov /
        dist = poisson
        offset = lnespo;
run;
/*ods exclude none;*/

Model Information,Model Information.1
Data Set,WORK.POLIZZE
Distribution,Poisson
Link Function,Log
Dependent Variable,nsin
Offset Variable,lnespo


In [84]:
proc print data = stime (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo,nsinatt
1,F,31-43,prov8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0,0.087
2,M,31-43,prov5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0,0.07387
3,M,44-51,prov7,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0,0.11133
4,F,18-22,prov10-12,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0,0.11383
5,M,44-51,prov2-9,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944,0.02238
6,F,44-51,prov8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0,0.101
7,M,31-43,prov1-3,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0,0.10671
8,M,61-,prov1-3,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0,0.11323
9,F,44-51,prov1-3,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0,0.09087
10,M,52-60,prov1-3,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0,0.09399


In [85]:
/* Stimatore di Cameron Trivedi */
/* Stimatore di Pinquet */
data alpha_dataset;
    set stime;
    
    /* Stimatore di Cameron Trievedi */
    add = ((nsin - nsinatt)**2 - nsinatt) / nsinatt**2;
    
    /* Stimatore di Pinquet */
    alphanum = nsinatt**2;
    alphaden = (nsin - nsinatt)**2 - nsinatt;
    
    keep nsincum nsinatt add alphanum alphaden;
run;

proc means data = alpha_dataset;
    var add alphanum alphaden;
    output out = alpha_sum sum = sadd salphanum salphaden;
run;

proc print data = alpha_sum (obs = 10);
run;

Variable,N,Mean,Std Dev,Minimum,Maximum
add alphanum alphaden,172161 172161 172161,37.9694385 0.0073439 0.0057876,14556.53 0.0087395 0.3862777,-6631.66 2.273133E-8 -0.3115638,5431253.23 0.2822487 23.8076966

Obs,_TYPE_,_FREQ_,sadd,salphanum,salphaden
1,0,172161,6536856.5,1264.34,996.392


In [86]:
data alpha;
    set alpha_sum;
    
    /* Cameron Trivedi */
    alpha_ct = (sadd / (_FREQ_ - 17))**(-1);
    
    /* Pinquet */
    alpha_pinquet = salphanum / salphaden;
    
    keep alpha_ct alpha_pinquet;
run;

proc print data = alpha noobs;
run;

alpha_ct,alpha_pinquet
0.026334,1.26892


In [87]:
/* Modello Binomiale Negativa con alpha stimato tramite lo stimatore di Pinquet */
proc genmod data = polizze plots = stdreschi(xbeta);
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
    
    alpha = 1.26892;
    mu = _MEAN_;
    y = _RESP_;
    
    variance var = mu + mu**2 / alpha;
    
    if y > 0 then
        d = 2 * (y * log(y / mu) - (alpha + y) * log((alpha + y) / (alpha + mu)));
    else if y = 0 then
        d = 2 * alpha * log(1 + mu / alpha);
    
    deviance dev = d;
    
    model nsin = eta potkil bendie capoluogo prov /
        link = log
        offset = lnespo;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZE
Distribution,User
Link Function,Log
Dependent Variable,nsin
Offset Variable,lnespo

0,1
Number of Observations Read,172161
Number of Observations Used,172161

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,7,18-22 23-26 27-30 44-51 52-60 61- 31-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,170000.0,57559.8543,0.3344
Scaled Deviance,170000.0,57559.8543,0.3344
Pearson Chi-Square,170000.0,192330.8433,1.1173
Scaled Pearson X2,170000.0,192330.8433,1.1173
Log Likelihood,,-28779.9272,
Full Log Likelihood,,-28779.9272,
AIC (smaller is better),,57595.8543,
AICC (smaller is better),,57595.8583,
BIC (smaller is better),,57776.8657,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.793,0.0414,-1.8741,-1.7118,1875.28,<.0001
Eta,18-22,1,0.8233,0.0371,0.7506,0.8961,491.88,<.0001
Eta,23-26,1,0.4948,0.0325,0.4311,0.5585,231.65,<.0001
Eta,27-30,1,0.1941,0.0335,0.1284,0.2597,33.54,<.0001
Eta,44-51,1,0.1499,0.0291,0.093,0.2069,26.60,<.0001
Eta,52-60,1,0.1853,0.03,0.1265,0.2441,38.11,<.0001
Eta,61-,1,0.0856,0.032,0.0229,0.1483,7.16,0.0075
Eta,31-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1558,0.0197,-0.1945,-0.1171,62.26,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.


In [88]:
/* Modello Binomiale Negativa con alpha stimato tramite la massima verosimiglianza */
proc genmod data = polizze plots = stdreschi(xbeta);
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
      
    model nsin = eta potkil bendie capoluogo prov /
        dist = negbin
        link = log
        offset = lnespo;
run;

Model Information,Model Information.1
Data Set,WORK.POLIZZE
Distribution,Negative Binomial
Link Function,Log
Dependent Variable,nsin
Offset Variable,lnespo

0,1
Number of Observations Read,172161
Number of Observations Used,172161

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,7,18-22 23-26 27-30 44-51 52-60 61- 31-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,170000.0,57409.2051,0.3335
Scaled Deviance,170000.0,57409.2051,0.3335
Pearson Chi-Square,170000.0,192096.0927,1.1159
Scaled Pearson X2,170000.0,192096.0927,1.1159
Log Likelihood,,-44017.1675,
Full Log Likelihood,,-44721.6472,
AIC (smaller is better),,89481.2943,
AICC (smaller is better),,89481.2987,
BIC (smaller is better),,89672.3618,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,-1.7928,0.0414,-1.8741,-1.7116,1871.02,<.0001
Eta,18-22,1,0.8235,0.0372,0.7506,0.8963,490.85,<.0001
Eta,23-26,1,0.4949,0.0325,0.4311,0.5586,231.28,<.0001
Eta,27-30,1,0.1941,0.0335,0.1283,0.2598,33.49,<.0001
Eta,44-51,1,0.1499,0.0291,0.0929,0.207,26.56,<.0001
Eta,52-60,1,0.1853,0.03,0.1265,0.2442,38.07,<.0001
Eta,61-,1,0.0856,0.032,0.0228,0.1484,7.15,0.0075
Eta,31-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.1559,0.0198,-0.1946,-0.1171,62.18,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.


### 3.A.2 Modelli per il danno per sinistro

#### Preparazione dei dati

In [89]:
data danni;
    set dati.danni;
run;

proc print data = danni (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,danno
1,M,55,AR,NO,B,80,1180,18,359.06
2,M,56,TE,NO,B,55,1120,17,1617.55
3,F,33,VC,NO,B,36,795,14,179.53
4,F,56,BL,NO,B,66,1050,17,691.19
5,M,73,MS,NO,B,36,765,13,1617.55
6,M,61,VI,NO,B,77,1200,18,1201.05
7,F,31,BA,NO,B,52,990,15,538.59
8,M,37,VR,NO,B,103,1420,20,359.06
9,F,53,FO,NO,B,25,700,10,6059.09
10,M,70,FI,NO,B,65,1010,17,771.97


In [90]:
/* Applico la partizione in livelli definita precedentemente */
data danni;
    set danni;
    format prov $classprov.;
    format eta classeta.;
    format potf classpotf.;
    format potkil classpotkil.;
    format massa classmassa.;
run;

proc print data = danni (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,danno
1,M,52-60,prov1-3,NO,B,58-,1031-1379,17-21,359.06
2,M,52-60,prov1-3,NO,B,-57,1031-1379,17-21,1617.55
3,F,31-43,prov1-3,NO,B,-57,-1030,14-15,179.53
4,F,52-60,prov10-12,NO,B,58-,1031-1379,17-21,691.19
5,M,61-,prov7,NO,B,-57,-1030,8-13,1617.55
6,M,61-,prov1-3,NO,B,58-,1031-1379,17-21,1201.05
7,F,31-43,prov2-9,NO,B,-57,-1030,14-15,538.59
8,M,31-43,prov1-3,NO,B,58-,1380-1424,17-21,359.06
9,F,52-60,prov6,NO,B,-57,-1030,8-13,6059.09
10,M,61-,prov8,NO,B,58-,-1030,17-21,771.97


In [91]:
/* Dataset per modelli con dati raggruppati */
proc means data = danni nway noprint;
    class sesso capoluogo bendie prov eta potf potkil massa;
    var danno;
    output out = dannicum sum = dannocum;
run;

data dannicum;
    set dannicum;
    dannocummed = dannocum / _FREQ_;
    
    rename _FREQ_ = nsin;
    drop _TYPE_;
run;

proc print data = dannicum (obs = 10);
run;

Obs,Sesso,Capoluogo,Bendie,Prov,Eta,potf,Potkil,Massa,nsin,dannocum,dannocummed
1,F,NO,B,prov1-3,18-22,8-13,-57,-1030,46,128088.48,2784.53
2,F,NO,B,prov1-3,18-22,8-13,58-,-1030,1,987.41,987.41
3,F,NO,B,prov1-3,18-22,14-15,-57,-1030,16,54461.79,3403.86
4,F,NO,B,prov1-3,18-22,14-15,58-,-1030,2,2441.95,1220.97
5,F,NO,B,prov1-3,18-22,16,-57,-1030,2,4372.42,2186.21
6,F,NO,B,prov1-3,18-22,16,58-,-1030,1,10322.89,10322.89
7,F,NO,B,prov1-3,18-22,17-21,-57,1031-1379,1,556.54,556.54
8,F,NO,B,prov1-3,18-22,17-21,58-,1031-1379,2,3551.08,1775.54
9,F,NO,B,prov1-3,23-26,8-13,-57,-1030,62,151011.33,2435.67
10,F,NO,B,prov1-3,23-26,14-15,-57,-1030,26,33844.04,1301.69


In [92]:
/* Il dataset dannicum ha 2 249 righe */
proc summary data = dannicum;
    output out = conta_righe;
run;

proc print data = conta_righe;
run;

Obs,_TYPE_,_FREQ_
1,0,2249


#### 3.A.2.1 Modello Gamma

In [93]:
/* Modello con dati individuali */
proc genmod data = danni;
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
    model danno = eta potkil bendie capoluogo prov /
        dist = gamma
        link = log
        type3;
run;

Model Information,Model Information.1
Data Set,WORK.DANNI
Distribution,Gamma
Link Function,Log
Dependent Variable,danno

0,1
Number of Observations Read,12691
Number of Observations Used,12691

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,7,18-22 23-26 27-30 44-51 52-60 61- 31-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,13000.0,17803.3419,1.4048
Scaled Deviance,13000.0,14967.0467,1.181
Pearson Chi-Square,13000.0,114315.2165,9.0204
Scaled Pearson X2,13000.0,96103.3714,7.5833
Log Likelihood,,-113894.6875,
Full Log Likelihood,,-113894.6875,
AIC (smaller is better),,227827.3751,
AICC (smaller is better),,227827.435,
BIC (smaller is better),,227968.8994,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,7.857,0.0436,7.7715,7.9425,32459.9,<.0001
Eta,18-22,1,0.2319,0.0386,0.1563,0.3075,36.13,<.0001
Eta,23-26,1,0.163,0.0341,0.0961,0.2299,22.82,<.0001
Eta,27-30,1,0.106,0.0355,0.0364,0.1757,8.90,0.0028
Eta,44-51,1,-0.1213,0.0307,-0.1815,-0.061,15.57,<.0001
Eta,52-60,1,-0.0216,0.0317,-0.0838,0.0405,0.47,0.4950
Eta,61-,1,-0.0662,0.0341,-0.1329,0.0006,3.77,0.0522
Eta,31-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.2019,0.0208,-0.2427,-0.1611,94.08,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,6,123.98,<.0001
Potkil,1,95.58,<.0001
Bendie,1,16.69,<.0001
Capoluogo,1,22.78,<.0001
Prov,8,141.13,<.0001


In [94]:
/* Modello con dati raggruppati */
proc genmod data = dannicum;
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
    model dannocummed = eta potkil bendie capoluogo prov /
        dist = gamma
        link = log
        type3;
    weight nsin;
run;

Model Information,Model Information.1
Data Set,WORK.DANNICUM
Distribution,Gamma
Link Function,Log
Dependent Variable,dannocummed
Scale Weight Variable,nsin

0,1
Number of Observations Read,2249
Number of Observations Used,2249
Sum of Weights,12691

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,7,18-22 23-26 27-30 44-51 52-60 61- 31-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,2231.0,5247.068,2.3519
Scaled Deviance,2231.0,2607.1966,1.1686
Pearson Chi-Square,2231.0,14042.0724,6.2941
Scaled Pearson X2,2231.0,6977.3144,3.1274
Log Likelihood,,-20173.9912,
Full Log Likelihood,,-20173.9912,
AIC (smaller is better),,40385.9824,
AICC (smaller is better),,40386.3234,
BIC (smaller is better),,40494.629,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,7.857,0.0567,7.7458,7.9682,19185.3,<.0001
Eta,18-22,1,0.2319,0.0502,0.1335,0.3302,21.35,<.0001
Eta,23-26,1,0.163,0.0444,0.076,0.25,13.49,0.0002
Eta,27-30,1,0.106,0.0462,0.0154,0.1967,5.26,0.0218
Eta,44-51,1,-0.1213,0.04,-0.1996,-0.0429,9.20,0.0024
Eta,52-60,1,-0.0216,0.0413,-0.1025,0.0592,0.28,0.5999
Eta,61-,1,-0.0662,0.0443,-0.153,0.0207,2.23,0.1355
Eta,31-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.2019,0.0271,-0.255,-0.1489,55.61,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,6,72.63,<.0001
Potkil,1,56.11,<.0001
Bendie,1,9.85,0.0017
Capoluogo,1,13.44,0.0002
Prov,8,82.57,<.0001


In [95]:
/* Testo accorpamenti eta */
ods select contrasts;
proc genmod data = dannicum;
    class eta(ref='31-43') potkil bendie capoluogo prov(ref='prov1-3');
    model dannocummed = eta potkil bendie capoluogo prov /
        dist = gamma
        link = log
        type3;
    weight nsin;
    contrast "eta" eta  1 -1  0  0  0  0  0, /* 18-22 con 23-26 */
                   eta  0  0  1  0  0  0 -1, /* 27-30 con 31-43 */
                   eta  0  0  0  1 -1  0  0, /* 44-51 con 52-60 */
                   eta  0  0  0  0  1 -1  0; /* 52-60 con 61- */
                   
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,DF,Chi-Square,Pr > ChiSq,Type
eta,4,11.63,0.0203,LR


In [96]:
/* Ulteriori accorpamenti di variabili */
/* eta */
proc format;
    value classeta
        low-26 = "18-26"
        27-43 = "27-43"
        44-high = "44-";
run;

proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo
1,F,27-43,prov8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
2,M,27-43,prov5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
3,M,44-,prov7,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
4,F,18-26,prov10-12,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
5,M,44-,prov2-9,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944
6,F,44-,prov8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0
7,M,27-43,prov1-3,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0
8,M,44-,prov1-3,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
9,F,44-,prov1-3,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
10,M,44-,prov1-3,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0


In [97]:
proc genmod data = dannicum;
    class eta(ref='27-43') potkil bendie capoluogo prov(ref='prov1-3');
    model dannocummed = eta potkil bendie capoluogo prov /
        dist = gamma
        link = log
        type3;
    weight nsin;   
run;

Model Information,Model Information.1
Data Set,WORK.DANNICUM
Distribution,Gamma
Link Function,Log
Dependent Variable,dannocummed
Scale Weight Variable,nsin

0,1
Number of Observations Read,2249
Number of Observations Used,2249
Sum of Weights,12691

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,3,18-26 44- 27-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,9,prov10-12 prov11 prov2-9 prov4 prov5 prov6 prov7 prov8 prov1-3

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,2235.0,5270.5175,2.3582
Scaled Deviance,2235.0,2608.3318,1.167
Pearson Chi-Square,2235.0,14183.7232,6.3462
Scaled Pearson X2,2235.0,7019.3974,3.1407
Log Likelihood,,-20179.8054,
Full Log Likelihood,,-20179.8054,
AIC (smaller is better),,40389.6107,
AICC (smaller is better),,40389.8257,
BIC (smaller is better),,40475.3844,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,7.8888,0.0554,7.7803,7.9973,20292.3,<.0001
Eta,18-26,1,0.1602,0.0351,0.0915,0.229,20.87,<.0001
Eta,44-,1,-0.103,0.0286,-0.1589,-0.047,13.00,0.0003
Eta,27-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.203,0.0269,-0.2558,-0.1502,56.84,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.
Bendie,B,1,0.1303,0.0411,0.0498,0.2109,10.06,0.0015
Bendie,D,0,0.0,0.0,0.0,0.0,.,.
Capoluogo,NO,1,0.1183,0.0308,0.0579,0.1787,14.72,0.0001
Capoluogo,SI,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,2,61.0,<.0001
Potkil,1,57.39,<.0001
Bendie,1,9.73,0.0018
Capoluogo,1,14.4,0.0001
Prov,8,83.49,<.0001


In [98]:
/* Test per accorpamenti province */
ods select contrasts;
proc genmod data = dannicum;
    class eta(ref='27-43') potkil bendie capoluogo prov(ref='prov1-3');
    model dannocummed = eta potkil bendie capoluogo prov /
        dist = gamma
        link = log;
    weight nsin;
    contrasts "province" prov  0  0  0  0  0  0  1  0 -1, /* prov8 con prov1-3 */
                         prov  0  0  0  0  0  0  0  1 -1, /* prov7 con prov1-3 */
                         prov  0  0  0  1 -1  0  0  0  0, /* prov4 con prov5 */
                         prov  1  0  0  0  0 -1  0  0  0, /* prov6 con prov10-12 */
                         prov  0  1 -1  0  0  0  0  0  0; /* prov11 con prov2-9 */
run;

Contrast Results,Contrast Results,Contrast Results,Contrast Results,Contrast Results
Contrast,DF,Chi-Square,Pr > ChiSq,Type
province,5,1.23,0.9424,LR


In [99]:
/* prov */
proc format;
    value $classprov
        "RC","VT","CN","OR","RA","VC","FE","LC","SS","VV","BZ","PV","RG","CH","PG","AR","TE","TN","VR","EN","NO","SV","VI","MC","MT","RE","BS","RI","AG","AV","PC","LE","RN","AN","MS","BO","GE","TS","CB","FI","NU","PE","TO","MI" = "prov1-3-7-8"
        "BN","PO","IM","BA","PT","TA","SP","CA","ROMA","PA","AO","NA","CE","KR" = "prov2-9-11"
        "AL","SO","GO","LT","PR","PN","GR","UD","TV","PD","SI","TR","AP","LO","FG","ME","LU","CZ","FR","CT","VE","BG","IS","MO","TP" = "prov4-5"
        "CL","PI","VA","BR","FO","LI","SR","CO","SA","BL","PZ","BI","CR","CS","AT","MN","PS","AQ","RSM","VB","RO" = "prov6-10-12"
run;

proc print data = polizze (obs = 10);
run;

Obs,Sesso,Eta,Prov,Capoluogo,Bendie,Potkil,Massa,potf,espo,nsin,dannotot,freqsin,lnespo
1,F,27-43,prov1-3-7-8,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
2,M,27-43,prov4-5,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
3,M,44-,prov1-3-7-8,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
4,F,18-26,prov6-10-12 run,NO,B,-57,-1030,8-13,1.0,0,0.0,0,0.0
5,M,44-,prov2-9-11,NO,B,-57,-1030,8-13,0.2,0,0.0,0,-1.60944
6,F,44-,prov1-3-7-8,NO,B,-57,-1030,14-15,1.0,0,0.0,0,0.0
7,M,27-43,prov1-3-7-8,NO,D,-57,1031-1379,17-21,1.0,0,0.0,0,0.0
8,M,44-,prov1-3-7-8,SI,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
9,F,44-,prov1-3-7-8,NO,B,58-,-1030,14-15,1.0,0,0.0,0,0.0
10,M,44-,prov1-3-7-8,NO,B,58-,1031-1379,17-21,1.0,1,359.057,1,0.0


In [100]:
proc genmod data = dannicum plots = stdreschi(xbeta);
    class eta(ref='27-43') potkil bendie capoluogo prov(ref='prov1-3-7-8');
    model dannocummed = eta potkil bendie capoluogo prov /
        dist = gamma
        link = log
        type3;
    weight nsin;   
run;

Model Information,Model Information.1
Data Set,WORK.DANNICUM
Distribution,Gamma
Link Function,Log
Dependent Variable,dannocummed
Scale Weight Variable,nsin

0,1
Number of Observations Read,2249
Number of Observations Used,2249
Sum of Weights,12691

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
Eta,3,18-26 44- 27-43
Potkil,2,-57 58-
Bendie,2,B D
Capoluogo,2,NO SI
Prov,4,prov2-9-11 prov4-5 prov6-10-12 run prov1-3-7-8

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,2240.0,5272.9949,2.354
Scaled Deviance,2240.0,2608.4516,1.1645
Pearson Chi-Square,2240.0,14116.3074,6.3019
Scaled Pearson X2,2240.0,6983.0724,3.1174
Log Likelihood,,-20180.4183,
Full Log Likelihood,,-20180.4183,
AIC (smaller is better),,40380.8365,
AICC (smaller is better),,40380.9348,
BIC (smaller is better),,40438.0189,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,7.8775,0.0518,7.776,7.979,23119.8,<.0001
Eta,18-26,1,0.1592,0.0351,0.0905,0.2279,20.61,<.0001
Eta,44-,1,-0.1031,0.0285,-0.159,-0.0472,13.07,0.0003
Eta,27-43,0,0.0,0.0,0.0,0.0,.,.
Potkil,-57,1,-0.2028,0.0269,-0.2555,-0.15,56.78,<.0001
Potkil,58-,0,0.0,0.0,0.0,0.0,.,.
Bendie,B,1,0.1324,0.0409,0.0522,0.2125,10.46,0.0012
Bendie,D,0,0.0,0.0,0.0,0.0,.,.
Capoluogo,NO,1,0.1209,0.0305,0.061,0.1807,15.65,<.0001
Capoluogo,SI,0,0.0,0.0,0.0,0.0,.,.

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
Eta,2,60.76,<.0001
Potkil,1,57.34,<.0001
Bendie,1,10.11,0.0015
Capoluogo,1,15.29,<.0001
Prov,3,82.27,<.0001


## 4) Riservazione con i GLM

### Preparazione dei dati

In [101]:
data runoff;
    set dati.runoff;
run;

proc print data = runoff (obs = 10);
run;

Obs,pagamenti,nden,annoacc,annodiff,annopag
1,10775462,32165,0,0,0
2,9159050,32165,0,1,1
3,2581795,32165,0,2,2
4,1380284,32165,0,3,3
5,871723,32165,0,4,4
6,589888,32165,0,5,5
7,1275556,32165,0,6,6
8,1643537,32165,0,7,7
9,522288,32165,0,8,8
10,645271,32165,0,9,9


In [102]:
/*
Riporto:
- pagamentinden: pagamenti incrementali in rapporto al numero di sinistri denunciati
- logpagamenti: logaritmo dei pagamenti
*/
data runoff1;
    set runoff;
    pagamentinden = pagamenti / nden;
    logpagamenti = log(pagamenti);
run;

proc print data = runoff1 (obs = 10);
run;

Obs,pagamenti,nden,annoacc,annodiff,annopag,pagamentinden,logpagamenti
1,10775462,32165,0,0,0,335.006,16.1928
2,9159050,32165,0,1,1,284.752,16.0303
3,2581795,32165,0,2,2,80.267,14.764
4,1380284,32165,0,3,3,42.913,14.1378
5,871723,32165,0,4,4,27.102,13.6782
6,589888,32165,0,5,5,18.339,13.2877
7,1275556,32165,0,6,6,39.657,14.0589
8,1643537,32165,0,7,7,51.097,14.3124
9,522288,32165,0,8,8,16.238,13.166
10,645271,32165,0,9,9,20.061,13.3774


### Modello Poisson-Logaritmo

* Variabili risposta: $Y_i = P_{ij}$ (`pagamenti`), con distribuzione di **Poisson**;
* Variabili esplicative: $i$ (`annoacc`) e $j$ (`annodiff`), di classificazione $\eta_{ij}=\boldsymbol{x}'\beta = \mu + \alpha_i + \beta_j$ ;
* Funzione di collegamento: $g=log$.

In [103]:
proc genmod data = runoff1;
    class annoacc (ref = first) annodiff (ref = first);
    model pagamenti = annoacc annodiff /
        dist = poisson
        link = log
        type3;
run;

Model Information,Model Information.1,Model Information.2
Data Set,WORK.RUNOFF1,
Distribution,Poisson,
Link Function,Log,
Dependent Variable,pagamenti,pagamenti

0,1
Number of Observations Read,105
Number of Observations Used,105

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
annoacc,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annodiff,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,78.0,14260424.598,182825.9564
Scaled Deviance,78.0,14260424.598,182825.9564
Pearson Chi-Square,78.0,14447110.935,185219.371
Scaled Pearson X2,78.0,14447110.935,185219.371
Log Likelihood,,9218270255.4,
Full Log Likelihood,,-7131082.019,
AIC (smaller is better),,14262218.039,
AICC (smaller is better),,14262237.675,
BIC (smaller is better),,14262289.696,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,16.1578,0.0002,16.1574,16.1581,7.225E9,<.0001
annoacc,1.0,1,0.1685,0.0002,0.1681,0.169,473998,<.0001
annoacc,2.0,1,0.3099,0.0002,0.3095,0.3104,1688300,<.0001
annoacc,3.0,1,0.1911,0.0002,0.1906,0.1916,605889,<.0001
annoacc,4.0,1,0.2646,0.0002,0.2641,0.265,1191696,<.0001
annoacc,5.0,1,0.4359,0.0002,0.4354,0.4364,3451576,<.0001
annoacc,6.0,1,0.5303,0.0002,0.5299,0.5308,5257320,<.0001
annoacc,7.0,1,0.536,0.0002,0.5355,0.5364,5290452,<.0001
annoacc,8.0,1,0.691,0.0002,0.6906,0.6915,9217844,<.0001
annoacc,9.0,1,0.7318,0.0002,0.7314,0.7323,1.034E7,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,DF,Chi-Square,Pr > ChiSq
annoacc,13,36670000.0,<.0001
annodiff,13,494800000.0,<.0001


### Modello Gamma-Logaritmo

* Variabili risposta: $Y_i = P_{ij}$ (`pagamenti`), con distribuzione **gamma**;
* Variabili esplicative: $i$ (`annoacc`) e $j$ (`annodiff`), di classificazione $\eta_{ij}=\boldsymbol{x}'\beta = \mu + \alpha_i + \beta_j$ ;
* Funzione di collegamento: $g=log$.

In [104]:
proc genmod data = runoff1;
    class annoacc (ref = first) annodiff (ref = first);
    model pagamenti = annoacc annodiff /
        dist = gamma
        link = log
        scale = pearson
        type3;
run;

Model Information,Model Information.1,Model Information.2
Data Set,WORK.RUNOFF1,
Distribution,Gamma,
Link Function,Log,
Dependent Variable,pagamenti,pagamenti

0,1
Number of Observations Read,105
Number of Observations Used,105

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
annoacc,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annodiff,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,78.0,9.9323,0.1273
Scaled Deviance,78.0,88.4751,1.1343
Pearson Chi-Square,78.0,8.7563,0.1123
Scaled Pearson X2,78.0,78.0,1.0
Log Likelihood,,-1573.3582,
Full Log Likelihood,,-1573.3582,
AIC (smaller is better),,3200.7164,
AICC (smaller is better),,3220.3528,
BIC (smaller is better),,3272.3733,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,16.3646,0.14,16.0903,16.639,13666.4,<.0001
annoacc,1.0,1,0.2396,0.132,-0.0191,0.4983,3.29,0.0695
annoacc,2.0,1,0.2364,0.1377,-0.0334,0.5062,2.95,0.0859
annoacc,3.0,1,-0.0378,0.1415,-0.3151,0.2395,0.07,0.7893
annoacc,4.0,1,-0.0195,0.1477,-0.3091,0.27,0.02,0.8947
annoacc,5.0,1,0.1012,0.1534,-0.1995,0.402,0.44,0.5095
annoacc,6.0,1,0.1889,0.159,-0.1227,0.5005,1.41,0.2348
annoacc,7.0,1,0.337,0.1657,0.0122,0.6617,4.14,0.0420
annoacc,8.0,1,0.4124,0.1759,0.0677,0.7572,5.50,0.0190
annoacc,9.0,1,0.4317,0.1873,0.0646,0.7988,5.31,0.0212

Lagrange Multiplier Statistics,Lagrange Multiplier Statistics,Lagrange Multiplier Statistics
Parameter,Chi-Square,Pr > ChiSq
Scale,1.5689,0.2104

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
annoacc,13,78,2.52,0.0061,32.8,0.0018
annodiff,13,78,84.19,<.0001,1094.46,<.0001


### Modello di Poisson con Sovradispersione-Logaritmo

* Variabili risposta: $Y_i = P_{ij}$ (`pagamenti`), **Poisson con Sovradispersione**;
* Variabili esplicative: $i$ (`annoacc`) e $j$ (`annodiff`), di classificazione $\eta_{ij}=\boldsymbol{x}'\beta = \mu + \alpha_i + \beta_j$ ;
* Funzione di collegamento: $g=log$.

In [105]:
proc genmod data = runoff1;
    class annoacc (ref = first) annodiff (ref = first);
    model pagamenti = annoacc annodiff /
        dist = poisson
        link = log
        scale = pearson
        type3;
run;

Model Information,Model Information.1,Model Information.2
Data Set,WORK.RUNOFF1,
Distribution,Poisson,
Link Function,Log,
Dependent Variable,pagamenti,pagamenti

0,1
Number of Observations Read,105
Number of Observations Used,105

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
annoacc,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annodiff,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,78.0,14260424.598,182825.9564
Scaled Deviance,78.0,76.9921,0.9871
Pearson Chi-Square,78.0,14447110.935,185219.371
Scaled Pearson X2,78.0,78.0,1.0
Log Likelihood,,49769.4718,
Full Log Likelihood,,-7131082.019,
AIC (smaller is better),,14262218.039,
AICC (smaller is better),,14262237.675,
BIC (smaller is better),,14262289.696,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,16.1578,0.0818,15.9974,16.3181,39008.2,<.0001
annoacc,1.0,1,0.1685,0.1054,-0.038,0.375,2.56,0.1097
annoacc,2.0,1,0.3099,0.1027,0.1087,0.5111,9.12,0.0025
annoacc,3.0,1,0.1911,0.1057,-0.016,0.3982,3.27,0.0705
annoacc,4.0,1,0.2646,0.1043,0.0601,0.469,6.43,0.0112
annoacc,5.0,1,0.4359,0.101,0.238,0.6338,18.64,<.0001
annoacc,6.0,1,0.5303,0.0995,0.3352,0.7254,28.38,<.0001
annoacc,7.0,1,0.536,0.1003,0.3394,0.7325,28.56,<.0001
annoacc,8.0,1,0.691,0.098,0.499,0.883,49.77,<.0001
annoacc,9.0,1,0.7318,0.098,0.5398,0.9238,55.81,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
annoacc,13,78,15.23,<.0001,198.0,<.0001
annodiff,13,78,205.48,<.0001,2671.22,<.0001


### Modello di Poisson con Sovradispersione-Logaritmo per pagamenti rapportati a una misura di esposizione

* Variabili risposta: $Y_{ij} = \frac{P_{ij}}{w_{i}}$ pagamenti rapportati al numero di sinistri dell'anno $i$ denunciati nell’anno di accadimento $w_i$, **Poisson con Sovradispersione**, con pesi i numeri di sinistri dell'anno $i$ denunciati nell’anno di accadimento $\omega_i = w_i$;
* Variabili esplicative: $i$ (`annoacc`) e $j$ (`annodiff`), di classificazione $\eta_{ij}=\boldsymbol{x}'\beta = \mu + \alpha_i + \beta_j$ ;
* Funzione di collegamento: $g=log$.

In [106]:
proc genmod data = runoff1;
    class annoacc (ref = first) annodiff (ref = first);
    model pagamentinden = annoacc annodiff /
        dist = poisson
        link = log
        scale = pearson
        type3;
    weight nden;
run;

Model Information,Model Information.1,Model Information.2
Data Set,WORK.RUNOFF1,
Distribution,Poisson,
Link Function,Log,
Dependent Variable,pagamentinden,
Scale Weight Variable,nden,nden

0,1
Number of Observations Read,105
Number of Observations Used,105
Sum of Weights,3019776

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
annoacc,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annodiff,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,78.0,14260424.598,182825.9564
Scaled Deviance,78.0,76.9921,0.9871
Pearson Chi-Square,78.0,14447110.935,185219.371
Scaled Pearson X2,78.0,78.0,1.0
Log Likelihood,,16546.6067,
Full Log Likelihood,,-16497154.04,
AIC (smaller is better),,32994362.072,
AICC (smaller is better),,32994381.708,
BIC (smaller is better),,32994433.729,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,5.7791,0.0818,5.6188,5.9395,4990.20,<.0001
annoacc,1.0,1,0.1776,0.1054,-0.0289,0.3841,2.84,0.0919
annoacc,2.0,1,0.3268,0.1027,0.1256,0.528,10.14,0.0015
annoacc,3.0,1,0.2943,0.1057,0.0872,0.5014,7.76,0.0053
annoacc,4.0,1,0.3942,0.1043,0.1897,0.5986,14.28,0.0002
annoacc,5.0,1,0.5141,0.101,0.3162,0.712,25.92,<.0001
annoacc,6.0,1,0.5967,0.0995,0.4016,0.7918,35.94,<.0001
annoacc,7.0,1,0.6257,0.1003,0.4292,0.8223,38.93,<.0001
annoacc,8.0,1,1.0454,0.098,0.8534,1.2374,113.90,<.0001
annoacc,9.0,1,1.0019,0.098,0.8099,1.1939,104.60,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
annoacc,13,78,42.02,<.0001,546.21,<.0001
annodiff,13,78,205.48,<.0001,2671.22,<.0001


### Modello di Poisson-Composto-Logaritmo per pagamenti rapportati a una misura di esposizione

* Variabili risposta: $Y_{ij} = \frac{P_{ij}}{w_{i}}$ pagamenti rapportati al numero di sinistri dell'anno $i$ denunciati nell’anno di accadimento $w_i$, **Poisson-Composta**, con pesi i numeri di sinistri dell'anno $i$ denunciati nell’anno di accadimento $\omega_i = w_i$;
* Variabili esplicative: $i$ (`annoacc`) e $j$ (`annodiff`), di classificazione $\eta_{ij}=\boldsymbol{x}'\beta = \mu + \alpha_i + \beta_j$ ;
* Funzione di collegamento: $g=log$.

In [107]:
proc genmod data = runoff1;
    class annoacc (ref = first) annodiff (ref = first);
    
    csi = 1.01887;
    y = _RESP_;
    mu = _MEAN_;
    
    variance var = mu*csi;
    
    if y = 0 then
        d = 2 * mu**(2 - csi) / (2 - csi);
    else
        d = -2 * (y * (mu**(1 - csi) - y**(1 - csi)) / (1 - csi) - (mu**(2 - csi) - y**(2 - csi)) / (2 - csi));
    
    deviance dev = d;
    
    model pagamentinden = annoacc annodiff /
        link = log
        scale = pearson
        type3;

    weight nden;
run;

Model Information,Model Information.1,Model Information.2
Data Set,WORK.RUNOFF1,
Distribution,User,
Link Function,Log,
Dependent Variable,pagamentinden,
Scale Weight Variable,nden,nden

0,1
Number of Observations Read,105
Number of Observations Used,105
Sum of Weights,3019776

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
annoacc,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annodiff,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,78.0,13163753.882,168766.0754
Scaled Deviance,78.0,72.4134,0.9284
Pearson Chi-Square,78.0,14179315.033,181786.0902
Scaled Pearson X2,78.0,78.0,1.0
Log Likelihood,,-36.2067,
Full Log Likelihood,,-36.2067,
AIC (smaller is better),,126.4134,
AICC (smaller is better),,146.0498,
BIC (smaller is better),,198.0704,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,5.7791,0.0818,5.6188,5.9395,4990.30,<.0001
annoacc,1.0,1,0.1776,0.1054,-0.0289,0.3841,2.84,0.0919
annoacc,2.0,1,0.3268,0.1027,0.1256,0.528,10.14,0.0015
annoacc,3.0,1,0.2943,0.1057,0.0872,0.5014,7.76,0.0053
annoacc,4.0,1,0.3942,0.1043,0.1897,0.5986,14.28,0.0002
annoacc,5.0,1,0.5141,0.101,0.3162,0.712,25.92,<.0001
annoacc,6.0,1,0.5967,0.0995,0.4016,0.7918,35.94,<.0001
annoacc,7.0,1,0.6257,0.1003,0.4292,0.8223,38.94,<.0001
annoacc,8.0,1,1.0454,0.098,0.8534,1.2374,113.91,<.0001
annoacc,9.0,1,1.0019,0.098,0.8099,1.1939,104.60,<.0001

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
annoacc,13,78,38.02,<.0001,494.31,<.0001
annodiff,13,78,189.8,<.0001,2467.38,<.0001


### Modello di Poisson con Sovradispersione-Logaritmo per pagamenti rapportati a una misura di esposizione

* Variabili risposta: $Y_{ij} = \frac{P_{ij}}{w_{i}}$ pagamenti rapportati al numero di sinistri dell'anno $i$ denunciati nell’anno di accadimento $w_i$, **Poisson con Sovradispersione**, con pesi i numeri di sinistri dell'anno $i$ denunciati nell’anno di accadimento $\omega_i = w_i$;
* Variabili esplicative: $i$ (`annoacc`), $j$ (`annodiff`) e $i+j$ (`annopag`), di classificazione $\eta_{ij}=\boldsymbol{x}'\beta = \mu + \alpha_i + \beta_j + \gamma_{i+j}$ ;
* Funzione di collegamento: $g=log$.

In [108]:
proc genmod data = runoff1;
    class annoacc (ref = first) annodiff (ref = first) annopag;
    model pagamentinden = annoacc annodiff annopag /
        dist = poisson
        link = log
        scale = pearson
        type3;
    weight nden;
run;

Model Information,Model Information.1,Model Information.2
Data Set,WORK.RUNOFF1,
Distribution,Poisson,
Link Function,Log,
Dependent Variable,pagamentinden,
Scale Weight Variable,nden,nden

0,1
Number of Observations Read,105
Number of Observations Used,105
Sum of Weights,3019776

Class Level Information,Class Level Information,Class Level Information
Class,Levels,Values
annoacc,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annodiff,14,1 2 3 4 5 6 7 8 9 10 11 12 13 0
annopag,14,0 1 2 3 4 5 6 7 8 9 10 11 12 13

Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit,Criteria For Assessing Goodness Of Fit
Criterion,DF,Value,Value/DF
Deviance,66.0,10720784.463,162436.1282
Scaled Deviance,66.0,66.3551,1.0054
Pearson Chi-Square,66.0,10663412.571,161566.8571
Scaled Pearson X2,66.0,66.0,1.0
Log Likelihood,,18979.8945,
Full Log Likelihood,,-14727333.97,
AIC (smaller is better),,29454745.937,
AICC (smaller is better),,29454793.937,
BIC (smaller is better),,29454849.442,

0
Algorithm converged.

Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates,Analysis Of Maximum Likelihood Parameter Estimates
Parameter,Unnamed: 1_level_1,DF,Estimate,Standard Error,Wald 95% Confidence Limits,Wald 95% Confidence Limits.1,Wald Chi-Square,Pr > ChiSq
Intercept,,1,4.8454,1.0388,2.8094,6.8815,21.76,<.0001
annoacc,1.0,1,0.2951,0.1395,0.0218,0.5685,4.48,0.0343
annoacc,2.0,1,0.5029,0.205,0.101,0.9048,6.02,0.0142
annoacc,3.0,1,0.4727,0.2792,-0.0746,1.02,2.87,0.0905
annoacc,4.0,1,0.5844,0.3562,-0.1137,1.2825,2.69,0.1009
annoacc,5.0,1,0.7189,0.4352,-0.134,1.5718,2.73,0.0985
annoacc,6.0,1,0.8375,0.5145,-0.1708,1.8459,2.65,0.1035
annoacc,7.0,1,0.9726,0.5971,-0.1978,2.143,2.65,0.1034
annoacc,8.0,1,1.5515,0.6794,0.2199,2.8831,5.21,0.0224
annoacc,9.0,1,1.6365,0.7632,0.1407,3.1324,4.60,0.0320

LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis,LR Statistics For Type 3 Analysis
Source,Num DF,Den DF,F Value,Pr > F,Chi-Square,Pr > ChiSq
annoacc,12,66,3.79,0.0002,45.53,<.0001
annodiff,12,66,28.48,<.0001,341.81,<.0001
annopag,12,66,1.83,0.0617,21.91,0.0386
