# Initial Data Analysis

## Importing the data

In [1]:
#Import CSV

df <- read.csv("data/meteorite_landings.csv")

In [2]:
#getting info on the new dataframe

dim(df)
str(df)
summary(df)

'data.frame':	45716 obs. of  10 variables:
 $ name       : chr  "Aachen" "Aarhus" "Abee" "Acapulco" ...
 $ id         : int  1 2 6 10 370 379 390 392 398 417 ...
 $ nametype   : chr  "Valid" "Valid" "Valid" "Valid" ...
 $ recclass   : chr  "L5" "H6" "EH4" "Acapulcoite" ...
 $ mass..g.   : num  21 720 107000 1914 780 ...
 $ fall       : chr  "Fell" "Fell" "Fell" "Fell" ...
 $ year       : chr  "01/01/1880 12:00:00 AM" "01/01/1951 12:00:00 AM" "01/01/1952 12:00:00 AM" "01/01/1976 12:00:00 AM" ...
 $ reclat     : num  50.8 56.2 54.2 16.9 -33.2 ...
 $ reclong    : num  6.08 10.23 -113 -99.9 -64.95 ...
 $ GeoLocation: chr  "(50.775, 6.08333)" "(56.18333, 10.23333)" "(54.21667, -113.0)" "(16.88333, -99.9)" ...


     name                 id          nametype           recclass        
 Length:45716       Min.   :    1   Length:45716       Length:45716      
 Class :character   1st Qu.:12689   Class :character   Class :character  
 Mode  :character   Median :24262   Mode  :character   Mode  :character  
                    Mean   :26890                                        
                    3rd Qu.:40657                                        
                    Max.   :57458                                        
                                                                         
    mass..g.            fall               year               reclat      
 Min.   :       0   Length:45716       Length:45716       Min.   :-87.37  
 1st Qu.:       7   Class :character   Class :character   1st Qu.:-76.71  
 Median :      33   Mode  :character   Mode  :character   Median :-71.50  
 Mean   :   13278                                         Mean   :-39.12  
 3rd Qu.:     203                

## Cleaning up the data

In [3]:
#fixing the year column.

library("lubridate")

df$year <- as.Date(df$year, format = "%m/%d/%Y")
df$year <- as.integer(lubridate::year(df$year))

head(df)


Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




Unnamed: 0_level_0,name,id,nametype,recclass,mass..g.,fall,year,reclat,reclong,GeoLocation
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<chr>
1,Aachen,1,Valid,L5,21,Fell,1880,50.775,6.08333,"(50.775, 6.08333)"
2,Aarhus,2,Valid,H6,720,Fell,1951,56.18333,10.23333,"(56.18333, 10.23333)"
3,Abee,6,Valid,EH4,107000,Fell,1952,54.21667,-113.0,"(54.21667, -113.0)"
4,Acapulco,10,Valid,Acapulcoite,1914,Fell,1976,16.88333,-99.9,"(16.88333, -99.9)"
5,Achiras,370,Valid,L6,780,Fell,1902,-33.16667,-64.95,"(-33.16667, -64.95)"
6,Adhi Kot,379,Valid,EH4,4239,Fell,1919,32.1,71.8,"(32.1, 71.8)"


In [4]:
#header cleanup

names(df)[names(df) == "mass..g."] <- "mass_in_grams"
names(df)[names(df) == "fall"] <- "fell_or_found"
names(df)[names(df) == "reclat"] <- "latitude"
names(df)[names(df) == "reclong"] <- "longitude"

head(df)




Unnamed: 0_level_0,name,id,nametype,recclass,mass_in_grams,fell_or_found,year,latitude,longitude,GeoLocation
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<chr>
1,Aachen,1,Valid,L5,21,Fell,1880,50.775,6.08333,"(50.775, 6.08333)"
2,Aarhus,2,Valid,H6,720,Fell,1951,56.18333,10.23333,"(56.18333, 10.23333)"
3,Abee,6,Valid,EH4,107000,Fell,1952,54.21667,-113.0,"(54.21667, -113.0)"
4,Acapulco,10,Valid,Acapulcoite,1914,Fell,1976,16.88333,-99.9,"(16.88333, -99.9)"
5,Achiras,370,Valid,L6,780,Fell,1902,-33.16667,-64.95,"(-33.16667, -64.95)"
6,Adhi Kot,379,Valid,EH4,4239,Fell,1919,32.1,71.8,"(32.1, 71.8)"


In [5]:
#Removing things that turned out not to be meteorites.

meteorites <- df[ which(df$nametype=='Valid'), ]
meteorongs <- df[ which(df$nametype!='Valid'), ] #seewhatididthere?

head(meteorites,5L)
head(meteorongs,5L) 

#saving the meteorongs in a file.

write.csv(meteorongs,'data/meteorongs.csv')

Unnamed: 0_level_0,name,id,nametype,recclass,mass_in_grams,fell_or_found,year,latitude,longitude,GeoLocation
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<chr>
1,Aachen,1,Valid,L5,21,Fell,1880,50.775,6.08333,"(50.775, 6.08333)"
2,Aarhus,2,Valid,H6,720,Fell,1951,56.18333,10.23333,"(56.18333, 10.23333)"
3,Abee,6,Valid,EH4,107000,Fell,1952,54.21667,-113.0,"(54.21667, -113.0)"
4,Acapulco,10,Valid,Acapulcoite,1914,Fell,1976,16.88333,-99.9,"(16.88333, -99.9)"
5,Achiras,370,Valid,L6,780,Fell,1902,-33.16667,-64.95,"(-33.16667, -64.95)"


Unnamed: 0_level_0,name,id,nametype,recclass,mass_in_grams,fell_or_found,year,latitude,longitude,GeoLocation
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<chr>
5220,Brunflo,5157,Relict,Relict H,,Found,1980,63.11667,14.28333,"(63.11667, 14.28333)"
6819,David Glacier 92308,6614,Relict,Chondrite-fusion crust,1.7,Found,1992,-75.31667,162.0,"(-75.31667, 162.0)"
12641,Gove,52859,Relict,Relict iron,0.0,Found,1979,-12.26333,136.83833,"(-12.26333, 136.83833)"
15948,Gullhögen 001,44889,Relict,Relict OC,,Found,2000,58.38333,13.8,"(58.38333, 13.8)"
17757,LaPaz Icefield 031037,35780,Relict,Fusion crust,0.1,Found,2003,,,


## Finished data frame

In [6]:
write.csv(meteorites,'data/meteorites_cleaned.csv')

meteorites

Unnamed: 0_level_0,name,id,nametype,recclass,mass_in_grams,fell_or_found,year,latitude,longitude,GeoLocation
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<chr>
1,Aachen,1,Valid,L5,21,Fell,1880,50.77500,6.08333,"(50.775, 6.08333)"
2,Aarhus,2,Valid,H6,720,Fell,1951,56.18333,10.23333,"(56.18333, 10.23333)"
3,Abee,6,Valid,EH4,107000,Fell,1952,54.21667,-113.00000,"(54.21667, -113.0)"
4,Acapulco,10,Valid,Acapulcoite,1914,Fell,1976,16.88333,-99.90000,"(16.88333, -99.9)"
5,Achiras,370,Valid,L6,780,Fell,1902,-33.16667,-64.95000,"(-33.16667, -64.95)"
6,Adhi Kot,379,Valid,EH4,4239,Fell,1919,32.10000,71.80000,"(32.1, 71.8)"
7,Adzhi-Bogdo (stone),390,Valid,LL3-6,910,Fell,1949,44.83333,95.16667,"(44.83333, 95.16667)"
8,Agen,392,Valid,H5,30000,Fell,1814,44.21667,0.61667,"(44.21667, 0.61667)"
9,Aguada,398,Valid,L6,1620,Fell,1930,-31.60000,-65.23333,"(-31.6, -65.23333)"
10,Aguila Blanca,417,Valid,L,1440,Fell,1920,-30.86667,-64.55000,"(-30.86667, -64.55)"
