In [None]:
import numpy as np
import pandas as pd

# Import CSV dataset into Pandas DataFrame
df = pd.read_csv(
    "/kaggle/input/star-trek-season-1/Star_Trek-Season_1.csv",
    index_col = "episode_num"
)

In [None]:
# Column Names
df.columns

In [None]:
# Data Types
df.dtypes

In [None]:
# Convert to Boolean where necessary
bool_cols = ["bool_aliens_almost_took_over_planet", 
                "bool_aliens_almost_took_over_enterprise", 
                "bool_hand_phasers_fired", 
                "bool_ship_phasers_fired", 
                "bool_ship_photon_torpedos_fired", 
                "bool_enterprise_saved_the_day"]

for col in bool_cols:
    df[col] = df[col].astype('bool')

# print(df["bool_aliens_almost_took_over_planet"].dtype)
# print(df["bool_aliens_almost_took_over_enterprise"].dtype)

In [None]:
# Show DataFrame details
# validate the cast from above
df.info()

In [None]:
# Show the first five rows
df.head()

In [None]:
# Season 1 had no non-zero values for these fields
print(df["cnt_damn_it_jim_quote"] == False)
print(df["cnt_im_givin_her_all_shes_got_quote"] == False)

In [None]:
# Summary Statistics
df.describe()

In [None]:
# Correlation matrix and Heatmap

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# set the theme for Seaborn plots
sns.set_theme(style="darkgrid")

# exclude fields without usable values
df.drop(["season_num", "cnt_damn_it_jim_quote", "cnt_im_givin_her_all_shes_got_quote"],
        axis=1,
        inplace=True)

# compute correlation
corr = df.corr()

# plot the heatmap
plt.figure(figsize = (15,15))
sns.heatmap(
    corr,
    xticklabels=corr.columns,
    yticklabels=corr.columns,
    annot=True
)
plt.show()

In [None]:
# Histograms of all columns
df.hist(figsize=(15, 15))
plt.title("Histograms [All Columns]")
plt.show()

In [None]:
# https://seaborn.pydata.org/generated/seaborn.countplot.html#seaborn.countplot
sns.countplot(
    x="bool_aliens_almost_took_over_planet",
    data=df
).set_xlabel("Aliens (nearly) Take Over Planet")

plt.show()

In [None]:
# https://seaborn.pydata.org/generated/seaborn.countplot.html#seaborn.countplot
sns.countplot(
    x="bool_aliens_almost_took_over_enterprise",
    data=df
).set_xlabel("Aliens (nearly)  Take Over Enterprise")

plt.show()

In [None]:
# Time Series for "Kirk's Romances"
plt.figure(figsize = (30,7))
sns.lineplot(
    x="aired_date", 
    y="cnt_kirk_hookups",
    data=df
)
plt.xticks(rotation=80)
plt.title("Kirk's Time Series Romances")
plt.show()

In [None]:
# Time Series for "Downed Redshirts"
plt.figure(figsize = (30,7))
sns.lineplot(
    x="aired_date", 
    y="cnt_downed_redshirts",
    data=df
)
plt.xticks(rotation=80)
plt.title("Downed Red-shirts Time Series")
plt.show()