In [55]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = "plotly_white"

%matplotlib inline

# CHAPTER 2 - Statistical Learning

In [56]:
df = pd.read_csv('Data/Advertising.csv')

In [57]:
df

Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,9.3,6.4,12.8
198,199,283.6,42.0,66.2,25.5


In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  200 non-null    int64  
 1   TV          200 non-null    float64
 2   radio       200 non-null    float64
 3   newspaper   200 non-null    float64
 4   sales       200 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB


In [59]:
template="plotly_dark"
figs = []
figs.append (px.scatter(df, x="radio", y="sales", trendline="ols", template=template) )
figs.append (px.scatter(df, x="newspaper", y="sales", trendline="ols", template=template) )
figs.append (px.scatter(df, x="TV", y="sales", trendline="ols", template=template) )
                

In [60]:
figs[0].show()

In [61]:
plots = make_subplots(rows=1, cols=3)


for i, f in enumerate(figs):

    f.update_traces(marker=dict(size=5, color="White",
                                  line=dict(width=1,color='Red')),
                                  selector=dict(mode='markers')
                  )
    plots.add_trace(f['data'][0], row=1, col=i+1)
    plots.add_trace(f['data'][1], row=1, col=i+1)


plots.show()

## Income - 1

In [63]:
df = pd.read_csv('Data/Income1.csv')

In [66]:
df

Unnamed: 0.1,Unnamed: 0,Education,Income
0,1,10.0,26.658839
1,2,10.401338,27.306435
2,3,10.842809,22.13241
3,4,11.244147,21.169841
4,5,11.645485,15.192634
5,6,12.086957,26.398951
6,7,12.488294,17.435307
7,8,12.889632,25.507885
8,9,13.29097,36.884595
9,10,13.732441,39.666109


In [65]:
px.scatter(df, x="Education", y="Income", trendline="lowess", template=template)


## Income - 2

In [67]:
df = pd.read_csv('Data/Income2.csv')
df

Unnamed: 0.1,Unnamed: 0,Education,Seniority,Income
0,1,21.586207,113.103448,99.917173
1,2,18.275862,119.310345,92.579135
2,3,12.068966,100.689655,34.678727
3,4,17.034483,187.586207,78.702806
4,5,19.931034,20.0,68.009922
5,6,18.275862,26.206897,71.504485
6,7,19.931034,150.344828,87.970467
7,8,21.172414,82.068966,79.81103
8,9,20.344828,88.275862,90.006327
9,10,10.0,113.103448,45.655529


In [70]:
px.scatter_3d(df, x='Education', z='Income', y='Seniority').show()