In [None]:
# Import Dependencies
import pandas as pd

In [None]:
# Create a reference the CSV file desired
csv_path = "Resources/ufoSightings.csv"

# Read the CSV into a Pandas DataFrame
ufo_df = pd.read_csv(csv_path)

# Print the first five rows of data to the screen
ufo_df.head()

In [None]:
# Remove the rows with missing data
clean_ufo_df = ufo_df.dropna(how="any")
clean_ufo_df.count()

In [None]:
clean_ufo_df.head()

In [None]:
# Converting the "duration (seconds)" column's values to numeric
converted_ufo_df = clean_ufo_df.copy()
converted_ufo_df["duration (seconds)"] = converted_ufo_df.loc[:, "duration (seconds)"].astype(float)

In [None]:
converted_ufo_df.head()

In [None]:
# Filter the data so that only those sightings in the US are in a DataFrame
usa_ufo_df = converted_ufo_df.loc[converted_ufo_df["country"] == "us", :]
usa_ufo_df.head()

In [None]:
# Count how many sightings have occured within each state
state_counts = usa_ufo_df["state"].value_counts()
state_counts.head()

In [None]:
# Using GroupBy in order to separate the data into fields according to "state" values
grouped_usa_df = usa_ufo_df.groupby(['state'])

# The object returned is a "GroupBy" object and cannot be viewed normally...
print(grouped_usa_df)

# In order to be visualized, a data function must be used...
grouped_usa_df.count().head(10)

In [None]:
grouped_usa_df["duration (seconds)"].sum()

In [None]:
# Since "duration (seconds)" was converted to a numeric time, it can now be summed up per state
state_duration = grouped_usa_df["duration (seconds)"].sum()
state_duration.head()

In [None]:
# Creating a new DataFrame using both duration and count
state_summary_df = pd.DataFrame({"Number of Sightings": state_counts,
                                    "Total Visit Time": state_duration})
state_summary_df.head()

In [None]:
# It is also possible to group a DataFrame by multiple columns
# This returns an object with multiple indexes, however, which can be harder to deal with
grouped_international_data = converted_ufo_df.groupby(['country', 'state'])

grouped_international_data.count().head(20)

In [None]:
# Converting a GroupBy object into a DataFrame
international_duration_df = pd.DataFrame(
    grouped_international_data["duration (seconds)"].sum())
international_duration_df.head(10)