In [None]:
# %pip install -q numpy pandas seaborn
# %pip freeze > requirements.txt
# !python --version > python_version.txt
%pip install -q -r requirements.txt

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

print(pd.__version__)
print(np.__version__)

## Pandas Series

In [None]:
# Create a Pandas Series of random 1s and 0s
random_series = pd.Series(np.random.randint(0, 2, size=10))

random_series = random_series == 1
random_series[random_series]

In [None]:
series = pd.Series(data=[1, 2, 3], index=['one', 'two', 'three'])
series.loc['one']
# alternatively,
# series['one']

## Numpy Series

In [None]:
# Create a NumPy Series
my_series = np.array(range(0, 51, 10))

print(my_series)

In [None]:
df = pd.read_csv("https://data.wa.gov/api/views/f6w7-q2d2/rows.csv", nrows=1_000)

In [None]:
df.head()

## Indexing and Selecting Data

In [None]:
df_index = df.set_index('DOL Vehicle ID')

# selection by label
df_index.loc[235085336]

## Reshaping

In [None]:
df_index.info()

In [None]:
pd.pivot_table(
    data=df_index,
    columns='State',
    index=['Electric Vehicle Type', 'Make'],
    values='Base MSRP'
).unstack(1).stack(1, future_stack=True)

## Data Quality Checks

In [None]:
df.info()

### Number of missing values

In [None]:
_ = df.isna().sum()
missing = _[_ > 0]
missing

### Static values

In [None]:
_ = df.nunique()
static_vals = _[_ == 1]
static_vals

### Rows with missing cells

In [None]:
rows_with_missing = df.loc[df.isna().any(axis=1), :]
rows_with_missing.head()

## Statistical Analysis

In [None]:
df.describe()

In [None]:
f'{df['Postal Code'].memory_usage(deep=True):_}'

In [None]:
df = df.assign(**{'Postal Code': df['Postal Code'].fillna(0).astype('int32')})

In [None]:
f'{df['Postal Code'].memory_usage(deep=True):_}'

## Univariate Analysis

### Electric Range Distribution

In [None]:
electric_range = df['Electric Range'][df['Electric Range'] > 0]
sns.histplot(electric_range, binwidth=50)