### Import dependencies

In [1]:
# Import the various dependencies and setup
import os
import csv
import pandas as pd
from sqlalchemy import create_engine
pd.options.mode.chained_assignment = None

### Store CSV data into DataFrame

In [2]:
# Map the path where the input csv file is located
stocks_csv_file = "Resources/stocks_data.csv"

# Specify the delimiter to read and store the csv file into a Pandas DataFrame
stocks_df = pd.read_csv(stocks_csv_file, encoding = "UTF-8")

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,7/17/20,"$1,500.84",9329972,"$1,513.45","$1,537.51","$1,490"
1,7/16/20,"$1,500.64",14300790,"$1,477.16","$1,531.71","$1,466"


In [3]:
# Map the path where the input csv file is located
tweets_csv_file = "Resources/tweets_data.csv"

# Specify the delimiter to read and store the csv file into a Pandas DataFrame
tweets_df = pd.read_csv(tweets_csv_file, encoding = "UTF-8")

# Display the DataFrame columns
tweets_df.head(2)

Unnamed: 0,id,conversation_id,created_at,date,time,timezone,user_id,username,name,place,...,geo,source,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest
0,1.28294e+18,1.28293e+18,1594710000000.0,7/14/20,7:28:03,UTC,44196397,elonmusk,Elon Musk,,...,,,,,,"[{'user_id': '44196397', 'username': 'elonmusk...",,,,
1,1.28284e+18,1.2828e+18,1594690000000.0,7/14/20,1:10:26,UTC,44196397,elonmusk,Elon Musk,,...,,,,,,"[{'user_id': '44196397', 'username': 'elonmusk...",,,,


### Clean the DataFrame to only contain select columns

In [4]:
# Select the columns that are needed for the data transformation
stocks_df = stocks_df[["Date", " Close/Last", " Open", " High", " Low"]]

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close/Last,Open,High,Low
0,7/17/20,"$1,500.84","$1,513.45","$1,537.51","$1,490"
1,7/16/20,"$1,500.64","$1,477.16","$1,531.71","$1,466"


In [5]:
# Select the columns that are needed for the data transformation
tweets_df = tweets_df[["date", "tweet"]]

# Display the DataFrame columns
tweets_df.head(2)

Unnamed: 0,date,tweet
0,7/14/20,Cute
1,7/14/20,Wow


### Clean the DataFrame to only contain select rows

In [6]:
# Select tweets containing the word tesla
tweets_df = tweets_df[tweets_df["tweet"].str.contains("tesla")]

# Display the DataFrame columns
tweets_df.head(2)

Unnamed: 0,date,tweet
198,7/1/20,Tesla Impact Report (repost). We do everything...
278,6/21/20,Tesla Bioweapon Defense Mode https://www.tesla...


### Rename DataFrame columns

In [7]:
# Display the DataFrame column names
stocks_df.columns

Index(['Date', ' Close/Last', ' Open', ' High', ' Low'], dtype='object')

In [8]:
# Rename the columns using "inplace = True" to modify the data within the DataFrame
stocks_df.rename(columns = {" Close/Last":"Close", " Open":"Open", " High":"High", " Low":"Low"}, inplace = True)

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close,Open,High,Low
0,7/17/20,"$1,500.84","$1,513.45","$1,537.51","$1,490"
1,7/16/20,"$1,500.64","$1,477.16","$1,531.71","$1,466"


In [9]:
# Display the DataFrame column names
tweets_df.columns

Index(['date', 'tweet'], dtype='object')

In [10]:
# Rename the columns using "inplace = True" to modify the data within the DataFrame
tweets_df.rename(columns = {"date":"Date", "tweet":"Tweet"}, inplace = True)

# Display the DataFrame columns
tweets_df.head(2)

Unnamed: 0,Date,Tweet
198,7/1/20,Tesla Impact Report (repost). We do everything...
278,6/21/20,Tesla Bioweapon Defense Mode https://www.tesla...


### Clean DataFrame data format

In [11]:
# Replace the commas in the strings with null. The regex (regular expression) should be set to True
stocks_df = stocks_df.replace(",", "", regex = True)

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close,Open,High,Low
0,7/17/20,$1500.84,$1513.45,$1537.51,$1490
1,7/16/20,$1500.64,$1477.16,$1531.71,$1466


In [12]:
# Replace the $ symbol using left strip which removes the leading character in strings
# Using astype cast the resulting strings to a float data type
stocks_df["Close"] = stocks_df["Close"].str.lstrip("$").astype(float)
stocks_df["Open"] = stocks_df["Open"].str.lstrip("$").astype(float)
stocks_df["High"] = stocks_df["High"].str.lstrip("$").astype(float)
stocks_df["Low"] = stocks_df["Low"].str.lstrip("$").astype(float)

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close,Open,High,Low
0,7/17/20,1500.84,1513.45,1537.51,1490.0
1,7/16/20,1500.64,1477.16,1531.71,1466.0


### Add calculated columns to DataFrame

In [13]:
# Add a new column "Open_Close" by calculating the variance between the Closing and Opening stock values
stocks_df["Open_Close"] = stocks_df["Close"] - stocks_df["Open"]

# Add a new column "High_Low" by calculating the variance between the High and Low stock values
stocks_df["High_Low"] = stocks_df["High"] - stocks_df["Low"]

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close,Open,High,Low,Open_Close,High_Low
0,7/17/20,1500.84,1513.45,1537.51,1490.0,-12.61,47.51
1,7/16/20,1500.64,1477.16,1531.71,1466.0,23.48,65.71


### Reset DataFrame index

In [14]:
# Reset the index of the DataFrame using the drop parameter to avoid the old index being added as a column
stocks_df.reset_index(drop = True)

# Display the DataFrame columns
stocks_df.head(2)

Unnamed: 0,Date,Close,Open,High,Low,Open_Close,High_Low
0,7/17/20,1500.84,1513.45,1537.51,1490.0,-12.61,47.51
1,7/16/20,1500.64,1477.16,1531.71,1466.0,23.48,65.71


In [15]:
# Reset the index of the DataFrame using the drop parameter to avoid the old index being added as a column
tweets_df.reset_index(drop = True)

# Display the DataFrame columns
tweets_df.head(2)

Unnamed: 0,Date,Tweet
198,7/1/20,Tesla Impact Report (repost). We do everything...
278,6/21/20,Tesla Bioweapon Defense Mode https://www.tesla...


### Combine DataFrame

In [16]:
# Merge the two DataFrames using an inner join with "Date" as the key
tesla_df = pd.merge(stocks_df, tweets_df, how = "inner", on = "Date")

# Display the DataFrame columns
tesla_df.head(2)

Unnamed: 0,Date,Close,Open,High,Low,Open_Close,High_Low,Tweet
0,7/1/20,1119.63,1083.0,1135.33,1080.5,36.63,54.83,Tesla Impact Report (repost). We do everything...
1,6/15/20,990.9,917.79,998.84,908.5,73.11,90.34,Tesla Model S now first ever electric vehicle ...


### Store JSON data into a DataFrame

In [17]:
# json_file = "../Resources/customer_location.json"
# customer_location_df = pd.read_json(json_file)
# customer_location_df.head()

### Clean DataFrame

In [18]:
# new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
# new_customer_location_df.head()

### Connect to local database

In [19]:
# rds_connection_string = "<insert user name>:<insert password>@localhost:5432/customer_db"
# engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [20]:
# engine.table_names()

### Use pandas to load csv converted DataFrame into database

In [21]:
# new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [22]:
# new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [23]:
# pd.read_sql_query('select * from customer_name', con=engine).head()

### Confirm data has been added by querying the customer_location table

In [24]:
# pd.read_sql_query('select * from customer_location', con=engine).head()