# **Unidentified Flying Analytics:**
---
### Project Overview

From the description: What areas of the country are most likely to have UFO sightings? Are there any trends in UFO sightings over time? Do they tend to be clustered or seasonal? Do clusters of UFO sightings correlate with landmarks, such as airports or government research centers? What are the most common UFO descriptions?

---
### Dataset

https://www.kaggle.com/NUFORC/ufo-sightings

In [1]:
import pandas as pd
import requests
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import *
import zipfile
import geopandas as gpd
import glob

In [8]:
file_path = 'C:\\Users\\12039\\Documents\\UFO_sightings\\scrubbed.csv'
ufo_data = pd.read_csv(file_path)
ufo_data.head()

  ufo_data = pd.read_csv(file_path)


Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611


In [9]:
ufo_data.dtypes

datetime                 object
city                     object
state                    object
country                  object
shape                    object
duration (seconds)       object
duration (hours/min)     object
comments                 object
date posted              object
latitude                 object
longitude               float64
dtype: object

In [10]:
# Identify rows with problematic datetime values
problematic_dates = ufo_data['datetime'].loc[ufo_data['datetime'].str.contains('24:00')]
# Display rows with problematic datetime values after replacement
print(problematic_dates)

388      10/11/2006 24:00
693       10/1/2001 24:00
962       10/1/2012 24:00
1067     10/12/2003 24:00
1221     10/12/2013 24:00
               ...       
79136      9/4/2005 24:00
79137      9/4/2005 24:00
79740      9/7/2002 24:00
79759      9/7/2004 24:00
79942      9/8/2003 24:00
Name: datetime, Length: 694, dtype: object


In [11]:
# Replace the problematic values with an appropriate hour
ufo_data['datetime'] = ufo_data['datetime'].str.replace('24:00', '00:00')
# Convert the column to datetime
ufo_data['datetime'] = pd.to_datetime(ufo_data['datetime'], errors='coerce')

In [14]:
ufo_data['date posted'] = pd.to_datetime(ufo_data['date posted'])

In [19]:
ufo_data['duration (seconds)'] = pd.to_numeric(ufo_data['duration (seconds)'], errors='coerce')

In [20]:
ufo_data['duration(minutes)'] = ufo_data['duration (seconds)']/60

In [22]:
ufo_data['latitude'] = pd.to_numeric(ufo_data['latitude'], errors='coerce')

In [27]:
ufo_data.dtypes

datetime                datetime64[ns]
city                            object
state                           object
country                         object
shape                           object
duration (seconds)             float64
duration (hours/min)            object
comments                        object
date posted             datetime64[ns]
latitude                       float64
longitude                      float64
duration(minutes)              float64
dtype: object

In [18]:
ufo_data['shape'].unique()

array(['cylinder', 'light', 'circle', 'sphere', 'disk', 'fireball',
       'unknown', 'oval', 'other', 'cigar', 'rectangle', 'chevron',
       'triangle', 'formation', nan, 'delta', 'changing', 'egg',
       'diamond', 'flash', 'teardrop', 'cone', 'cross', 'pyramid',
       'round', 'crescent', 'flare', 'hexagon', 'dome', 'changed'],
      dtype=object)

In [24]:
ufo_data['country'].unique()

array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)

In [25]:
ufo_data['city'].unique()

array(['san marcos', 'lackland afb', 'chester (uk/england)', ...,
       'calmar (canada)', 'aleksandrow (poland)', 'hamstead (hollyridge)'],
      dtype=object)

In [26]:
ufo_data['state'].unique()

array(['tx', nan, 'hi', 'tn', 'ct', 'al', 'fl', 'ca', 'nc', 'ny', 'ky',
       'mi', 'ma', 'ks', 'sc', 'wa', 'ab', 'co', 'nh', 'wi', 'me', 'ga',
       'pa', 'il', 'ar', 'on', 'mo', 'oh', 'in', 'az', 'mn', 'nv', 'nf',
       'ne', 'or', 'bc', 'ia', 'va', 'id', 'nm', 'nj', 'mb', 'wv', 'ok',
       'ri', 'nb', 'vt', 'la', 'pr', 'ak', 'ms', 'ut', 'md', 'mt', 'sk',
       'wy', 'sd', 'pq', 'ns', 'qc', 'de', 'nd', 'dc', 'nt', 'sa', 'yt',
       'yk', 'pe'], dtype=object)

In [29]:
us_sightings = ufo_data[ufo_data['country'] == 'us']
state_counts = us_sightings.groupby('state').size().reset_index(name='sightings_count')
print(state_counts)

   state  sightings_count
0     ak              319
1     al              642
2     ar              588
3     az             2414
4     ca             8912
5     co             1413
6     ct              892
7     dc                7
8     de              166
9     fl             3835
10    ga             1255
11    hi              262
12    ia              678
13    id              521
14    il             2499
15    in             1288
16    ks              613
17    ky              855
18    la              558
19    ma             1256
20    md              837
21    me              558
22    mi             1836
23    mn             1012
24    mo             1458
25    ms              375
26    mt              478
27    nc             1740
28    nd              129
29    ne              381
30    nh              486
31    nj             1255
32    nm              720
33    nv              803
34    ny             2980
35    oh             2275
36    ok              724
37    or    