### Step 0: Imports and Reading Data

In [None]:
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns

pl.Config.set_tbl_cols(200)
pl.Config.set_tbl_rows(-1)

In [None]:
df = pl.read_csv("coaster_db.csv")

### Step 1: Data Understanding
* Dataframe shape
* *head* and *tail* 
* dtypes, columns
* describe

In [None]:
df.shape

In [None]:
df.head(5)

In [None]:
print(", ".join(df.columns))

In [None]:
pl.DataFrame({"column": df.columns, "dtype": df.dtypes})

In [None]:
df.describe()

### Step 2: Data Preperation
* Dropping irrelevant columns and rows
* Identifying duplicated columns
* Renaming Columns
* Feature Creation

In [15]:
coaster = df[
    [
        "coaster_name",
        "Location",
        "Status",
        "Manufacturer",
        "year_introduced",
        "latitude",
        "longitude",
        "Type_Main",
        "opening_date_clean",
        "speed_mph",
        "height_ft",
        "Inversions_clean",
        "Gforce_clean",
    ]
].clone()

In [None]:
coaster.describe

In [27]:
import plotly.express as px

fig = px.scatter_geo(
    coaster,
    lat="latitude",
    lon="longitude",
    color="Type_Main",
    hover_name="coaster_name",  # show coaster names on hover
    hover_data={"latitude": False, "longitude": False},  # hide raw lat/lon
    projection="natural earth",  # cleaner map projection
    title="🎢 Roller Coasters Around the World by Type",
    template="plotly_white",
)

In [29]:
# Improve marker styling
fig.update_traces(marker=dict(size=6, opacity=0.7, line=dict(width=0.5, color="white")))

# Improve layout
fig.update_layout(
    legend_title="Coaster Type",
    geo=dict(
        showland=True,
        landcolor="lightgray",
        showcountries=True,
        countrycolor="white",
        showocean=True,
        oceancolor="lightblue",
    ),
    title_x=0.5,  # center the title
)