In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as pyo
import missingno as msno

In [None]:
pyo.init_notebook_mode()

In [2]:
dataset = pd.read_csv('cleaned_ufo.csv')

In [3]:
dataset.drop(dataset.loc[dataset["year"] == "unknown"].index, inplace=True)

In [4]:
dataset.dtypes

location         object
country          object
shape            object
summary          object
year             object
month            object
day of month     object
time             object
time duration     int64
time unit        object
dtype: object

pandas reads `int` columns as `float` and time as `str` by default. this cannot be changed

In [5]:
dataset["year"] = dataset["year"].astype("float").astype("Int64")
dataset["day of month"] = dataset["day of month"].astype("float").astype("Int64")
dataset["time"] = pd.to_datetime(dataset["time"],format= '%H:%M:%S' ).dt.time

In [6]:
shape = pd.DataFrame(columns=["count","shape", "year"])

In [7]:
yearList = list(dataset["year"].unique())
yearList.sort()

for year in yearList:
    
    data = dataset[ dataset["year"] == year].groupby(["year"],)["shape"].value_counts().to_frame()
 
    data.rename(columns={"shape" : "count"}, inplace=True)
 
    data["shape"] =  data.index.get_level_values(1)
 
    data["year"] = year

    shape = shape.append(data, ignore_index=True)

shape["count"] = shape["count"].astype('Int64')

In [8]:
shapeCount = shape[ ["count", "shape"]].groupby("shape").sum()

In [9]:

barTitle = "Figure showing all the different ufo shapes spotted from {} till {}".format(min(yearList), max(yearList))

data = [go.Bar(y=shapeCount["count"],x = shapeCount.index, width= 1.01)]

fig = go.Figure(data = data , layout_title_text  = barTitle )
fig.show()

In [13]:
shapeCountry = dataset.value_counts(subset=["year", "shape", "country"]).to_frame().reset_index()

shapeCountry =  shapeCountry.rename(columns={0: "count"})
shapeCountry =  shapeCountry.groupby(["country", "year"]).head()
minVal = shapeCountry["count"].min()
maxVal = shapeCountry["count"].max()

In [14]:

fig = px.scatter(
    shapeCountry,
    x = "year",
    y = "count",
    animation_frame="year",
    animation_group="country",
    size = "count",
    color= "shape",
    hover_name= "country",
    log_x=True,
    size_max= 50,
    range_y=[minVal, maxVal],
    range_x=[1970, 2020]
)

fig.add_annotation(
        text="Figure showing shape count by country over the years",
        x=0,
        y=-0.12,
        showarrow=False,
        textangle=0,
        xanchor='left',
        xref="paper",
        yref="paper"
        )
fig.show()