In [None]:
# Import Dependencies
import pandas as pd

In [None]:
# Create a reference the CSV file desired
csv_path = "Resources/ufoSightings.csv"

# Read the CSV into a Pandas DataFrame
ufo_df = pd.read_csv(csv_path, low_memory=False)

# Print the first five rows of data to the screen
ufo_df.head()

In [None]:
# Remove the rows with missing data
clean_ufo_df = ufo_df.dropna(how="any")
clean_ufo_df.count()

In [None]:
# Converting the "duration (seconds)" column's values to numeric
clean_ufo_df["duration (seconds)"] = pd.to_numeric(
    clean_ufo_df["duration (seconds)"])

In [None]:
# Filter the data so that only those sightings in the US are in a DataFrame
usa_ufo_df = clean_ufo_df.loc[clean_ufo_df["country"] == "us", :]

usa_ufo_df.head()

In [None]:
# Count how many sightings have occured within each state
state_counts = usa_ufo_df["state"].value_counts()
state_counts.head()

In [None]:
# ---------------------------------------------------------------------------
# The df.groupby([Column]) method is used in order to split the DataFrame
# into multiple groups with each group being a different state within the US.
# ---------------------------------------------------------------------------

# Using GroupBy in order to separate the data into fields according to "state" values
grouped_usa_df = usa_ufo_df.groupby(['state'])

# ---------------------------------------------------------------------------
# The object returned by the .groupby() method is a GroupBy object and cannot
# be accessed like a normal DataFrame. One of the only ways in which to
# access values within a GroupBy object is by using a data function on it.
# In order to be visualized, a data function must be used...
# ---------------------------------------------------------------------------
print(grouped_usa_df)
grouped_usa_df.count().head(10)

In [None]:
# Since "duration (seconds)" was converted to a numeric time,
# it can now be summed up per state
state_duration = grouped_usa_df["duration (seconds)"].sum()
state_duration.head()


In [None]:
# ---------------------------------------------------------------------------
# It is possible to create new DataFrames using purely GroupBy data. This can
# by done by taking the pd.DataFrame() method and passing the GroupBy data
# desired in as the parameter
# ---------------------------------------------------------------------------

# Creating a new DataFrame using both duration and count
state_summary_table = pd.DataFrame({"Number of Sightings": state_counts,
                                    "Total Visit Time": state_duration})
state_summary_table.head()

In [None]:
# ---------------------------------------------------------------------------
# It is possible to perform a df.groupby() method on multiple columns which
# can be done by passing => 2 column references into the list parameter.
#
# This returns an object with multiple indexes which can increase complexity
# ---------------------------------------------------------------------------

grouped_international_data = clean_ufo_df.groupby(['country', 'state'])
grouped_international_data.count().head(20)

In [None]:
# ---------------------------------------------------------------------------
# A new dataframe can be created from a GroupBy object
# ---------------------------------------------------------------------------
# Converting a GroupBy object into a DataFrame
international_duration = pd.DataFrame(
    grouped_international_data["duration (seconds)"].sum())

international_duration.head(10)