In [124]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import chardet
import calendar

In [125]:
def shapes_by_month(month_index):
    """
    Takes in month index and returns a df of shape counts by month.

    Parameters
    ----------
    month_index : int
        Month index of interest.

    Returns
    -------
    df : pd.DataFrame
        Dataframe of shape counts by month.

    Notes
    -----
    Month index starts at 1.
    """

    months = range(1, 13)
    data_by_month_df = {}
    shapes_by_month_df = {}

    for month in months:
        month_data = ufo_filtered[ufo_filtered["Date / Time"].dt.month == month]
        data_by_month_df[month] = pd.DataFrame(month_data)
        shapes_by_month_df[month] = pd.DataFrame(
            month_data["Shape"]
            .value_counts()
            .rename_axis("Shape")
            .reset_index(name="Count")
        )

    month = data_by_month_df[month_index].sort_values(by="Date / Time", ascending=True)
    month_shapes = (
        month["Shape"].value_counts().rename_axis("Shape").reset_index(name="Count")
    )
    month_shapes["Month"] = list(range(len(month_shapes)))

    shape_counts = (
        month["Shape"].value_counts().rename_axis("Shape").reset_index(name="Count")
    )
    month_df = pd.DataFrame(shape_counts).T
    month_df.columns = month_df.iloc[0]
    month_df = month_df.drop("Shape")

    month_number = month_index  # Replace 1 with the desired month number
    month_name = calendar.month_name[month_number]

    month_df.index = pd.to_datetime([f"2000-{month_number:02d}-01"]).strftime("%B")
    return month_df

In [126]:
path = Path(r"C:\Users\dontb\01\001\Analysis\UFOs\UFOs_data.csv")
ufo = pd.read_csv(path, encoding="Windows-1252")
ufo.columns = ufo.columns.str.strip()
shape_colors = [
    "#364EA1", "#386EA2", "#3A8EA4", "#3CA59D", "#3FA680", "#41A865", 
    "#43A949", "#5CAB45", "#7BAC47", "#99AD4A", "#AEA64C", "#B08C4E",
    "#B17250", "#B25852", "#B4556A", "#B55787", "#B659A3", "#B05BB8",
    "#975EB9", "#7F60BA", "#6862BB"
]
state_colors = [
    "#534618", "#564219", "#593E1A", "#5C3A1B", "#5E351C", "#612F1D",
    "#632A1E", "#66241F", "#692021", "#6B202A", "#6E2133", "#71223C",
    "#732345", "#76244F", "#782559", "#7B2664", "#7E276E", "#80287A", 
    "#812983", "#7A2A85", "#732B88", "#6C2C8A", "#642D8D", "#5D2F8F",
    "#553092", "#4C3195", "#443297", "#3B339A", "#34369C", "#35429F", 
    "#364EA1", "#395CA3", "#3D69A5", "#4077A6", "#4384A8", "#4790AA",
    "#4A9DAC", "#4DA9AE", "#51AFAA", "#54B1A3", "#57B39B", "#5BB594",
    "#5EB68D", "#61B887", "#65BA80", "#68BB7B", "#6BBD75", "#6FBF70",
    "#79C172", "#84C276", "#8FC479", "#99C67C", "#A4C780", "#ADC983", 
    "#B7CB87", "#C0CC8A", "#C8CE8E", "#CFCE91", "#D1CA94", "#D3C698"
]
dates = ufo["Date / Time"]
lng = ufo["lng"]
lat = ufo["lat"]
shapes = ufo["Shape"].value_counts().rename_axis("Shape").reset_index(name="Count")
shapes = shapes.sort_values(by="Count", ascending=True)
cities = ufo["City"]
states = ufo["State"].value_counts().rename_axis("State").reset_index(name="Count")
states = states.sort_values(by="Count", ascending=True)
summaries = ufo["Summary"].copy()
ufo.rename(columns={"lat": "LAT", "lng": "LNG"}, inplace=True)

ufo["Date / Time"] = ufo["Date / Time"].str.replace(".", ":", regex=True)
ufo["Date / Time"] = pd.to_datetime(ufo["Date / Time"])
ufo_filtered = ufo.copy()
ufo_filtered.drop(ufo_filtered[ufo_filtered["Country"].str.contains("CANADA")].index, inplace=True)
ufo_filtered.drop(["LAT", "LNG", "Summary", "Country"], axis=1, inplace=True)

In [145]:
dfs = []
for i in range(1, 13):
    dfs.append(shapes_by_month(i))
shapes_by_month_data = pd.concat(dfs, axis=0, ignore_index=False)
shapes_by_month_data.fillna(0, inplace=True)
shapes_by_month_data

Shape,Light,Circle,Other,Fireball,Triangle,Unknown,Sphere,Disk,Formation,Oval,...,Flash,Rectangle,Diamond,Cylinder,Cigar,Chevron,Teardrop,Cross,Cone,Egg
January,94,54,39,38,34,32,27,21,20,19,...,10,9,8,8,6,6,4,2,2,1
February,98,46,16,36,48,39,25,8,16,7,...,21,5,1,7,9,4,4,0,5,1
March,74,49,25,44,43,32,21,26,19,19,...,15,8,5,3,7,2,4,2,0,2
April,76,72,21,42,36,35,35,24,19,19,...,6,9,4,5,11,5,4,1,2,5
May,80,52,23,30,38,27,25,14,21,15,...,8,7,4,3,7,6,1,4,0,1
June,73,72,18,28,30,34,27,7,10,20,...,11,12,7,14,9,5,2,0,2,2
July,104,65,40,72,49,47,30,17,16,13,...,13,12,0,7,8,5,8,2,3,2
August,100,58,29,30,41,36,26,17,12,21,...,11,14,3,2,10,5,2,1,0,5
September,96,52,34,29,56,52,24,18,16,20,...,15,11,11,6,11,7,3,2,1,5
October,111,59,32,30,46,33,32,19,20,12,...,3,6,13,3,7,3,11,3,5,1


In [None]:
# Bar plot of UFO sightings by state

sns.set_palette(state_colors)
sns.set(font_scale=0.7, style="ticks")
plt.subplots(figsize=(16, 9))
chart = sns.barplot(
    data=states, x="State", y="Count", palette=state_colors, edgecolor="black"
)
for i in chart.containers:
    chart.bar_label(i)
plt.xlabel("State", fontsize=10)
plt.ylabel("Number of Sightings", fontsize=16)
plt.title("Total UFO Sightings by State (2016)\n", fontsize=15)

In [None]:
# Bar plot of UFO sightings by shape

sns.set_palette(shape_colors)
sns.set(font_scale=0.7, style="ticks")
plt.subplots(figsize=(16, 9))
chart = sns.barplot(
    data=shapes, x="Shape", y="Count", palette=shape_colors, edgecolor="black"
)
for i in chart.containers:
    chart.bar_label(i)
plt.xlabel("UFO Shape", fontsize=10)
plt.ylabel("Number of Sightings", fontsize=16)
plt.title("Total UFO Sightings by Shape (2016)\n", fontsize=15)

In [None]:
# Hexbin plot of UFO sightings

plt.hexbin(lng, lat, gridsize=15, cmap="BuPu")
plt.colorbar(label="Density")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Hexbin Plot of UFO Sightings (2016)")
plt.grid(True, alpha=0.4)
plt.show()

In [None]:
# Scatter slot of UFO sightings by location and shape

sns.set_palette(shape_colors)
plt.figure(figsize=(16, 9))
sns.scatterplot(
    data=ufo, x="LNG", y="LAT", hue="Shape", palette=shape_colors, legend="auto"
)
plt.title("UFO Sightings by Location and Shape (2016)\n", fontsize=20)
plt.xlabel("LNG", fontsize=14)
plt.ylabel("LAT", fontsize=14)
plt.grid(True, alpha=0.4)