In [None]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re

# Accessing API data

link to landslide data: https://data.nasa.gov/Earth-Science/Global-Landslide-Catalog-Export/dd9e-wu2v

request a nasa api key here: https://api.nasa.gov/index.html#signUp

In [None]:
# save your personal api key in a text file and read it in, I saved mine as nasa_key
with open('nasa_apikey.txt', 'r') as file:
    nasa_key = file.read()

In [None]:
# initialize the payload
payload = {}

# save the apikey as a header to use in request.get()
headers= {
  "apikey": nasa_key
}

In [None]:
# url for accessing the data - this is usually found in the API documentation
nasa_url = 'https://data.nasa.gov/resource/dd9e-wu2v.json'

In [None]:
# request to pull the data with your API
r = requests.get(nasa_url, headers=headers, data = payload)

In [None]:
# this will return True if your request is successful
r.ok

In [None]:
# read the data and turn it into a data frame
landslide = r.json()
landslide_df = pd.DataFrame(landslide)

In [None]:
landslide_df.shape

### Data cleaning

In [None]:
# select desired columns
landslide_df = landslide_df[['event_date', 'landslide_category', 'landslide_trigger', 
'landslide_size', 'fatality_count', 'injury_count', 'country_name', 'longitude', 'latitude']]

In [None]:
landslide_df.isnull().sum()

In [None]:
landslide_df.info()

In [None]:
# changing numeric variables to type float instead of type object
landslide_df['fatality_count'] = landslide_df['fatality_count'].astype(float)
landslide_df['injury_count'] = landslide_df['injury_count'].astype(float)
landslide_df['longitude'] = landslide_df['longitude'].astype(float)
landslide_df['latitude'] = landslide_df['latitude'].astype(float)

# simplifying event_date and changing to type datetime
landslide_df['event_date'] = landslide_df['event_date'].str.extract(r'(\d+\-\d+\-\d+)')
landslide_df['event_date'] = pd.to_datetime(landslide_df['event_date'])

In [None]:
# fill NA variables
landslide_df['landslide_size'] = landslide_df['landslide_size'].fillna('unknown')
landslide_df['landslide_trigger'] = landslide_df['landslide_trigger'].fillna('unknown')
landslide_df['country_name'] = landslide_df['country_name'].fillna('other/unknown')
landslide_df['fatality_count'] = landslide_df['fatality_count'].fillna(0)
landslide_df['injury_count'] = landslide_df['injury_count'].fillna(0)

In [None]:
landslide_df.info()

In [None]:
landslide_df.head()

In [None]:
landslide_df.shape

### Exporting the data

In [None]:
landslide_df.to_csv("landslide", index = False)

In [None]:
from pandas import read_csv

csv = read_csv('landslide')

In [None]:
# Note: you will have to change the variable types again if you read it in from a csv
csv.info()

# Exploratory Data Analysis...