In [None]:
import urllib

import statsmodels.api as sm
import sklearn.datasets as ds
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf

from pandas.plotting import scatter_matrix

In [None]:
# Loading data

def get_iris_df():

  iris_ds = ds.load_iris()

  df = (
      pd.DataFrame(
        iris_ds['data'],
        columns = iris_ds['feature_names']
      )
    )
  
  code_species_map = (
      dict(
        zip(
          range(3), 
          iris_ds['target_names']
        )
      )
    )
  
  df['species'] = (
      [
        code_species_map[c]
          for c in iris_ds['target']
      ]
    )
  
  return df

df = get_iris_df()

df.info()

In [None]:
# Pie charts
sums_by_species = df.groupby('species').sum()
var = 'sepal width (cm)'
sums_by_species[var].plot(kind='pie', fontsize=20)
plt.title('Breakdown for ' + var, fontdict = {"fontsize": 25})
plt.show()

In [None]:
sums_by_species = df.groupby('species').sum()
sums_by_species.plot(kind='pie', subplots=True)
plt.title('Total Measurements, by Species')
plt.show()

In [None]:
# Bar charts

sums_by_species = df.groupby('species').sum()
var = 'sepal width (cm)'
sums_by_species[var].plot(kind='bar', fontsize=15,
rot=30)
plt.title('Breakdown for ' + var, fontsize=20)
plt.savefig('iris_bar_for_one_variable.jpg')
plt.close()
sums_by_species = df.groupby('species').sum()
sums_by_species.plot(
    kind='bar', subplots=True, fontsize=12)
plt.suptitle('Total Measurements, by Species')
plt.show()

In [None]:
# Histograms

df.plot(kind='hist', subplots=True, layout=(2,2))
plt.suptitle('Iris Histograms', fontsize=20)
plt.show()
for spec in df['species'].unique():
  forspec = df[df['species']==spec]
  (
    forspec['petal length (cm)']
    .plot(
        kind='hist', 
        alpha=0.4, 
        label=spec
    )
  )
plt.legend(loc='upper right')
plt.suptitle('Petal Length by Species')
plt.show()


In [None]:
# Means, quantiles, etc.

col = df['petal length (cm)']
Average = col.mean()
Std = col.std()
Median = col.quantile(0.5)
Percentile25 = col.quantile(0.25)
Percentile75 = col.quantile(0.75)

col = df['petal length (cm)']
Perc25 = col.quantile(0.25)
Perc75 = col.quantile(0.75)
Clean_Avg = col[(col>Perc25)&(col<Perc75)].mean()

In [None]:
# Boxplots

col = 'sepal length (cm)'
df['ind'] = pd.Series(df.index).apply(lambda i: i% 50)
df.pivot(index='ind', columns='species', values=col).plot(kind='box')
plt.show()

In [None]:
# Scatterplots

df.plot(
  kind="scatter",
  x="sepal length (cm)", 
  y="sepal width (cm)"
)
plt.title("Length vs Width")
plt.show()

plt.close()
colors = ["r", "g", "b"]
markers= [".", "*", "^"]
fig, ax = plt.subplots(1, 1)
for i, spec in enumerate(df['species'].unique() ):
  ddf = df[df['species']==spec]
  ddf.plot(kind="scatter",
    x="sepal width (cm)", y="sepal length (cm)",
    alpha=0.5, s=10*(i+1), ax=ax,
    color=colors[i], marker=markers[i], label=spec)
plt.legend()
plt.show()

In [None]:
# Scatterplots with logs

# Make Pandas dataframe
bs = ds. fetch_california_housing()

df = pd.DataFrame(bs.data, columns=bs.feature_names)
df["Target"] = bs.target
df.info()

In [None]:
df.plot(x='AveRooms', y='Target', kind='scatter')
plt.title('AveRooms on logarithmic axis')
plt.show()

scatter_matrix(df)
plt.show()

In [None]:
# Heatmaps

df = get_iris_df()

df.plot(
    kind="hexbin",
    x="sepal width (cm)", 
    y="sepal length (cm)"
)

plt.show()

In [None]:
# CO2 time series

dta = sm.datasets.co2.load_pandas().data
dta.plot()
plt.title("CO2 Levels")
plt.ylabel("Parts per million")
plt.show()

In [None]:
# Stock price

yf_ticker = yf.Ticker("GOOG")

df = yf_ticker.history(period="max", interval="1d")

df['LogClose'] = np.log(df['Close'])
df['Close'].plot()
plt.title("Normal Axis")
plt.show()

df['Close'].plot(logy=True)
plt.title("Logarithmic Axis")
plt.show()