# 기출복원 (Type-3)

## 데이터 로드

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('weather.csv')
df.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,NW,6.0,...,29,1019.7,1015.0,7,7,14.4,23.6,No,3.6,Yes
1,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,W,4.0,...,36,1012.4,1008.4,5,3,17.5,25.7,Yes,3.6,Yes
2,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,NNE,6.0,...,69,1009.5,1007.2,8,7,15.4,20.2,Yes,39.8,Yes
3,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,W,30.0,...,56,1005.5,1007.0,2,7,13.5,14.1,Yes,2.8,Yes
4,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,ESE,20.0,...,49,1018.3,1018.5,7,7,11.1,15.4,Yes,0.0,No


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 22 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MinTemp        366 non-null    float64
 1   MaxTemp        366 non-null    float64
 2   Rainfall       366 non-null    float64
 3   Evaporation    366 non-null    float64
 4   Sunshine       363 non-null    float64
 5   WindGustDir    363 non-null    object 
 6   WindGustSpeed  364 non-null    float64
 7   WindDir9am     335 non-null    object 
 8   WindDir3pm     365 non-null    object 
 9   WindSpeed9am   359 non-null    float64
 10  WindSpeed3pm   366 non-null    int64  
 11  Humidity9am    366 non-null    int64  
 12  Humidity3pm    366 non-null    int64  
 13  Pressure9am    366 non-null    float64
 14  Pressure3pm    366 non-null    float64
 15  Cloud9am       366 non-null    int64  
 16  Cloud3pm       366 non-null    int64  
 17  Temp9am        366 non-null    float64
 18  Temp3pm   

## Multi Linear Regression 모형

- MaxTemp, Sunshine, WindGustSpeed 를 가지고 Temp9am 의 값을 예측하는 회귀모델 (동일 데이터를 찾을 수 없으 의미있는 변수는 아님)
- 독립변수 : MaxTemp, Sunshine, WindGustSpeed
- 종속변수 : Temp9am

In [4]:
from statsmodels.formula.api import ols
import statsmodels.api as sm

### 회귀 모형 생성 및 확인

In [5]:
model = ols(formula='Temp9am ~ MaxTemp + Sunshine + WindGustSpeed', data=df).fit()
result = sm.stats.anova_lm(model)

In [6]:
model.summary()

0,1,2,3
Dep. Variable:,Temp9am,R-squared:,0.827
Model:,OLS,Adj. R-squared:,0.826
Method:,Least Squares,F-statistic:,569.1
Date:,"Sat, 24 Jun 2023",Prob (F-statistic):,1.2599999999999999e-135
Time:,14:50:05,Log-Likelihood:,-819.73
No. Observations:,361,AIC:,1647.0
Df Residuals:,357,BIC:,1663.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-4.2483,0.536,-7.931,0.000,-5.302,-3.195
MaxTemp,0.8046,0.021,38.628,0.000,0.764,0.846
Sunshine,-0.3713,0.040,-9.282,0.000,-0.450,-0.293
WindGustSpeed,0.0753,0.010,7.882,0.000,0.057,0.094

0,1,2,3
Omnibus:,12.597,Durbin-Watson:,1.402
Prob(Omnibus):,0.002,Jarque-Bera (JB):,12.927
Skew:,-0.45,Prob(JB):,0.00156
Kurtosis:,3.226,Cond. No.,204.0


### 회귀계수 확인

In [7]:
model.params

Intercept       -4.248298
MaxTemp          0.804586
Sunshine        -0.371265
WindGustSpeed    0.075322
dtype: float64

In [8]:
intercept, coef1, coef2, coef3 = model.params

### 결과 예측

- MaxTemp = 24.4, Sunshine = 6.3, WindGustSpeed = 30.0 일 때 Temp9am 값 예측

In [9]:
xtrain = pd.Series([24.3, 6.3, 30.0], index=['MaxTemp', 'Sunshine', 'WindGustSpeed']).to_frame().T
xtrain

Unnamed: 0,MaxTemp,Sunshine,WindGustSpeed
0,24.3,6.3,30.0


In [10]:
model.predict(exog=xtrain)

0    15.223839
dtype: float64

### 독립변수 P-Value 확인

In [11]:
result

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
MaxTemp,1.0,8701.80587,8701.80587,1566.433007,1.297567e-132
Sunshine,1.0,437.462935,437.462935,78.748755,3.460182e-17
WindGustSpeed,1.0,345.154432,345.154432,62.13208,3.933379e-14
Residual,357.0,1983.196653,5.555173,,


In [12]:
# 소수점 이하 4자리 반올림
result['P-Value'] = round(result['PR(>F)'], 4)
result

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F),P-Value
MaxTemp,1.0,8701.80587,8701.80587,1566.433007,1.297567e-132,0.0
Sunshine,1.0,437.462935,437.462935,78.748755,3.460182e-17,0.0
WindGustSpeed,1.0,345.154432,345.154432,62.13208,3.933379e-14,0.0
Residual,357.0,1983.196653,5.555173,,,


### 독립변수의 신뢰구간

In [13]:
model.conf_int(alpha=0.05)

Unnamed: 0,0,1
Intercept,-5.301696,-3.1949
MaxTemp,0.763623,0.845549
Sunshine,-0.449926,-0.292604
WindGustSpeed,0.056529,0.094115


### 종속변수 신뢰구간

- obs_ci_lower ~ obs_ci_upper

In [14]:
predictions = model.get_prediction(xtrain)
predictions.summary_frame(alpha=0.05)

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper
0,15.223839,0.197971,14.834504,15.613174,10.572282,19.875396


In [15]:
predictions.summary_frame(alpha=0.1)

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper
0,15.223839,0.197971,14.897359,15.550319,11.323242,19.124436


In [16]:
predictions.summary_frame(alpha=0.01)

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper
0,15.223839,0.197971,14.71116,15.736518,9.098644,21.349035
