## DATA VISUALIZATION
#### Create a visualization that reads the dataset and shows number of UFO sightings by country

In [1]:
#importing the required libraries
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets

In [2]:
#accesing and reading the data file
df = pd.read_csv("https://github.com/UIUC-iSchool-DataViz/is445_AOUAOG_fall2021/raw/master/week05/data/ufo-scrubbed-geocoded-time-standardized-00.csv", header=None)

#### Setup the appropriate column labels

In [3]:
#To label the columns, we'll look at the data and analyze
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [4]:
#by looking at the data above, we get a pretty good idea as to what the type of data each column holds
#assigning labels to the columns
df.columns=['Sighting Date','City','State Code','Country Code','Shape Observed','Duration In Seconds','Duration','Description Provided','Report Date','Latitude','Longitude']
df.head()

Unnamed: 0,Sighting Date,City,State Code,Country Code,Shape Observed,Duration In Seconds,Duration,Description Provided,Report Date,Latitude,Longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


#### Correct for Not-A-Number issues

In [5]:
#checking for null values
df.isnull().sum()

Sighting Date              0
City                       0
State Code              5797
Country Code            9670
Shape Observed          1932
Duration In Seconds        0
Duration                   0
Description Provided      15
Report Date                0
Latitude                   0
Longitude                  0
dtype: int64

In [6]:
#we notice that there are multiple columns holding null values
#we'll replace the null values from all of these columns
df.fillna("unknown", inplace = True)
#checking for number of null values again, to confirm that the null values got successfully replaced
df.isnull().sum()

Sighting Date           0
City                    0
State Code              0
Country Code            0
Shape Observed          0
Duration In Seconds     0
Duration                0
Description Provided    0
Report Date             0
Latitude                0
Longitude               0
dtype: int64

In [7]:
#taking a look at the data, we notice that the values that we saw as NaN earlier, are now shown as 'unknown'
df.head()

Unnamed: 0,Sighting Date,City,State Code,Country Code,Shape Observed,Duration In Seconds,Duration,Description Provided,Report Date,Latitude,Longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,unknown,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),unknown,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [8]:
#summarizing the values(country labels and count) to plot the number of UFO sightings against the corresponding countries
country_counts = df['Country Code'].value_counts().tolist()
country_values = df['Country Code'].value_counts().keys().tolist()

#visualizing
#creating interactive widgets
@ipywidgets.interact(style = plt.style.available)
def render_plot(style): 
    with plt.style.context(style):
        fig = plt.figure()
        ax = fig.add_axes([0,0,1,1])
        ax.bar(country_values,country_counts, color=['cyan', 'magenta', 'yellow', 'black', 'red'])
        ax.set_ylabel('Number of UFO Sightings')
        ax.set_xlabel('Countries')
    plt.show()

interactive(children=(Dropdown(description='style', options=('Solarize_Light2', '_classic_test_patch', 'bmh', …