In [1]:
import pandas as pd
import numpy as np
import sys
from sqlalchemy import create_engine


In [2]:
def load_data(messages_filepath,categories_filepath):
    
    '''We will read two dataframe and merge them on id column
    
       Our output will be the new dataframe called df which merged of messages and categories..
    
    '''
    
    # READ DATASETS
    messages_df=pd.read_csv(messages_filepath)
    categories_df=pd.read_csv(categories_filepath)
    
    # MERGE DATASETS
    df= messages_df.merge(categories_df, on='id')
    
    # RETURN OUTPUT
    
    return df

In [3]:
def clean_data(df):
    
    categories=df['categories'].str.split(';',expand=True)
    
    category_colnames=categories.loc[0].str.split('-',expand=True)[0].tolist()
    
    categories.columns=category_colnames
    
    for i in categories:        
        categories[i]  = pd.to_numeric(categories[i].str[-1])
        
    categories['id']=df['id']
    
    df=df.merge(categories,on='id')
    
    df = df.drop_duplicates(subset='id', keep='first')
    
    df.drop('categories',axis=1,inplace=True)
    
    
    return df
    

In [4]:
def save_data(df, database_filename):
    
    """
    Saves given dataframe into an table in SQLite database file.
    Input:
    - df: DataFrame <- Pandas DataFrame containing cleaned data of messages and categories
    - database_filename: String <- Location of file where the database file is to be stored    
    """
    # Create connection with database
    engine = create_engine('sqlite:///'+ database_filename)
    
    # Save dataset to database table
    df.to_sql('DisasterData_Clean', engine, index=False)


In [20]:
def main():
    
    messages_filepath='/Users/alisurmeli/Documents/Python_Nanodegree/Project_Disaster Response/messages.csv'
    categories_filepath='/Users/alisurmeli/Documents/Python_Nanodegree/Project_Disaster Response/categories.csv'
    database_filename='Disaster_response_pipe.db'
    
    
    print('Loading data...\n    MESSAGES: {}\n    CATEGORIES: {}'.format(messages_filepath, categories_filepath))
    df = load_data(messages_filepath, categories_filepath)
    
    print('Cleaning data...')
    df = clean_data(df)
    
    print('Saving data...\n    DATABASE: {}'.format(database_filename))
    save_data(df, database_filename)
    
    print('Cleaned data saved to database!')
        

In [21]:
if __name__ == '__main__':
    main()
    

Loading data...
    MESSAGES: /Users/alisurmeli/Documents/Python_Nanodegree/Project_Disaster Response/messages.csv
    CATEGORIES: /Users/alisurmeli/Documents/Python_Nanodegree/Project_Disaster Response/categories.csv
Cleaning data...
Saving data...
    DATABASE: disaster_response_pipe.db
Cleaned data saved to database!
