# UFO Sightings Redux

by **Mazza Luca**, **Giada Galdiolo** and **Vasco Silva Pereira**

## Dataset

[UFO Sightings Redux *by National UFO Reporting Center*](https://github.com/rfordatascience/tidytuesday/blob/main/data/2023/2023-06-20/readme.md)



In [None]:
# Load Dataset

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as ps
from plotly.subplots import make_subplots


df = pd.read_csv('./data/ufo_sightings.csv')
df_dayparts = pd.read_csv('./data/day_parts_map.csv')
df_places = pd.read_csv('./data/places.csv')

df['id'] = range(1, len(df) + 1)
df.set_index('id', inplace=True)

df['posted_date'] = pd.to_datetime(df['posted_date'], format = '%Y-%m-%d', errors = 'coerce')
df['reported_date_time'] = pd.to_datetime(df['reported_date_time'], errors = 'coerce')
df['reported_date_time_utc'] = pd.to_datetime(df['reported_date_time_utc'], errors = 'coerce')

In [None]:
df.info()

In [None]:
df.head(100000)

In [None]:
df_tmp = df.groupby(df['reported_date_time'].dt.year).size()
px.line(data_frame=df_tmp) \
    .update_layout(title="Sightings per Year", xaxis_title="Reported Year", yaxis_title="Number of sightings", showlegend=False)

In [None]:
px.histogram(data_frame=df, x='duration_seconds', range_x=[0, 1000000])\
    .update_yaxes(type="log")\
    .update_layout(title='Distribution of Duration', xaxis_title="Duration [s]", yaxis_title="Number of sightings", showlegend=False)