In [None]:
# Visualizing the plot
import matplotlib.pyplot as plt
import seaborn as sns

sns.regplot(x='n_claims', y='total_payement_sek', data=swedish_motor_insurance, ci=None)
plt.show()

In [None]:
# Import the ols function
from statsmodels.formula.api import ols

# Create the model object
# price_twd_msq as the response variable and n_convenience as the explanatory variable
mdl_price_vs_conv = ols(formula='price_twd_msq ~ n_convenience', data=taiwan_real_estate)

# Fit the model
mdl_price_vs_conv = mdl_price_vs_conv.fit()

# Print the parameters of the fitted model
print(mdl_price_vs_conv.params)

In [None]:
# Visualizing 1 numeric and 1 categorical variable
import matplotlib.pyplot as plt
import seaborn as sns

sns.displot(data=fish, x='mass_g', col='species', col_wraps=2, bins=9)
plt.show()

# Mean mass by species
summary_stats = fish.groupby('species')['mass_g'].mean()
print(summary_stats)

In [None]:
# Linear regression
from statsmodels.formula.api import ols
mdl_mass_vs_species = ols('mass_g ~ species', data=fish).fit()
print(mdl_mass_vs_species.params)

# Without an intercept
mdl_mass_vs_species = ols('mass_g ~ species + 0', data=fish).fit()
print(mdl_mass_vs_species.params)

In [None]:
# Running the model
import numpy as np
import pandas as pd
mdl_mass_vs_length = ols('mass_g ~ length_cm', data=bream).fit()
print(mdl_mass_vs_length.params)

explanatory_data = pd.DataFrame({'length_cm':np.arange(20, 41)})
print(mdl_mass_vs_length.predict(explanatory_data))

# Predicting inside a DataFrame
explanatory_data = pd.DataFrame({'length_cm':np.arange(20, 41)})
predicton_data = explanatory_data.assign(mass_g=mdl_mass_vs_length.predict(explanatory_data))
print(predicton_data)

# Showing predictions
import matplotlib.pyplot as plt
import seaborn as sns
fig = plt.figure()
sns.regplot(x='length_cm', y='mass_g', ci=None, data=bream)
sns.scatterplot(x='length_cm', y='mass_g', data=prediction_data, color='red', marker='s')
plt.show()

# Extrapolating 
little_bream = pd.DataFrame({'length_cm': [10]})
pred_little_bream = little_bream.assign(mass_g=ml_mass_vs_length.predict(little_bream))
print(pred_little_bream)

In [None]:
from statsmodels.formula.api import ols
mdl_mass_vs_length = ols('mass_g ~ length_cm', data=bream).fit()
print(mdl_mass_vs_length.params)

# .fittedvalues attribute(used to create models)
print(mdl_mass_vs_length.fittedvalues)
# or
explanatory_data = bream['length_cm']
print(mdl_mass_vs_length.predict(explanatory_data))

# .resid attribute(measure of inaccuracy in model fit)
print(mdl_mass_vs_length.resid)
# or (Actual response value-predicted response value)
print(bream['mass_g'] - mdl_mass_vs_length.fittedvalues)

# .summary()
mdl_mass_vs_length.summary()

In [None]:
# Regression to the mean
fig = plt.figure()
sns.scatterplot(x='father_height_cm', y='son_height_cm', data=father_son)
plt.axline(xy1=(150, 150), slope=1, linewidth=2, color='green')
plt.axis('equal')   # x-axis appears the same as one centimeter on y-axis
plt.show()

# Adding regression line
fig = plt.figure()
sns.regplot(x='father_height_cm', y='son_height_cm', data=father_son, ci=None, line_kws={'color': 'black'})
plt.axline(xy1=(150, 150), slope=1, linewidth=2, color='green')
plt.axis('equal')
plt.show()

# Running a regression
mdl_son_vs_father = ols('son_height_cm ~ father_height_cm', data=father.son).fit()
print(mdl_son_vs_father.params)

# Making predictions
really_tall_father = pd.DataFrame({'father_height_cm': [190]})
mdl_son_vs_father.predict(really_tall_father)

really_short_father = pd.DataFrame({'father_height_cm': [150]})
mdl_son_vs_father.predict(really_short_father)

In [None]:
# Transforming variables
# Plotting mass vs. length cubed
perch['length_cm_cubed'] = perch['length_cm'] ** 3
sns.regplot(x='length_cm_cubed', y='mass_g', data=perch, ci=None)
plt.show()

mdl_perch = ols('mass_g ~ length_cm_cubed', data=perch).fit()
mdl_perch.params

explanatory_data = pd.DataFrame({'length_cm_cubed': np.arange(10, 41, 5) ** 3, 'length_cm':np.arange(10, 41, 5)})

prediction_data = explanatory_data.assign(mass_g=mdl_perch.predict(explanatory_data))
print(prediction_data)

fig = plt.figure()
sns.regplot(x='length_cm_cubed', y='mass_g', data=perch, ci=None)
sns.scatterplot(data=prediction_data, x='length_cm_cubed', y='mass_g', color='red', marker='s')

fig = plt.figure()
sns.regplot(x='length_cm', y='mass_g', data=perch, ci=None)
sns.scatterplot(data=prediction_data, y='mass_g', x='length_cm', color='red', marker='s')
