In [1]:
import pandas as pd
from path import Path

In [2]:
def csv_to_df(stock_csv, sentiment_results_csv):
    file_path1 = Path(stock_csv)
    file_path2 = Path(sentiment_results_csv)
    stock_df = pd.read_csv(file_path1)
    sentiment_result = pd.read_csv(file_path2)
    
    return stock_df, sentiment_result

In [3]:
def add_count(sentiment_result_df): 
    a=sentiment_result_df.groupby("created_at").size().values
    sentiment_result_df= sentiment_result_df.drop_duplicates(subset="created_at").assign(Count=a)
    
    return sentiment_result_df

In [4]:
def split_df(sentiment_result_df): 
    df_count = sentiment_result_df[["created_at", "Count"]]
    df_compound = sentiment_result_df[["created_at", "compound"]]
    
    return df_count, df_compound

In [5]:
#Calculation of the mean polarity/day
def calculate_mean(sent_results_df):
    sent_results_df['Date'] = pd.to_datetime(sent_results_df.created_at, format='%Y-%m-%d')
    compound_mean = sent_results_df.groupby(sent_results_df['Date'].dt.date).mean()
    
    return compound_mean

In [6]:
#Calculation of the sum of tweets/day
def calculate_sum(sent_results_df):
    sent_results_df['Date'] = pd.to_datetime(sent_results_df.created_at, format='%Y-%m-%d')
    count_sum = sent_results_df.groupby(sent_results_df['Date'].dt.date).sum()
    
    return count_sum

In [7]:
#Combination of polarity mean and count sum
def join_dfs(compound_df, count_df):
    sentiment_result_df = pd.concat([compound_df, count_df], axis=1)
    
    return sentiment_result_df

In [8]:
#Merge of stock data and sentiment results
def merge_dfs(stock_df, sum_sent):
    stock_df['Date'] = pd.to_datetime(stock_df.Date, format='%Y-%m-%d')
    stock_df = stock_df.set_index("Date")
    input_df = pd.concat([stock_df, sum_sent], axis=1)
    
    return input_df

In [9]:
def create_csv(input_df, csv_name):
    input_df.to_csv(csv_name, encoding='utf-8', index=True)

In [10]:
aapl_stock_df, aapl_sent_result_df = csv_to_df("../stock_data/aapl.csv", "../sentiment_analysis/sentiment_analysis/aapl_sent_results.csv")
btc_stock_df, btc_sent_result_df = csv_to_df("../stock_data/btc.csv", "../sentiment_analysis/sentiment_analysis/btc_sent_results.csv")
jnj_stock_df, jnj_sent_result_df = csv_to_df("../stock_data/jnj.csv", "../sentiment_analysis/sentiment_analysis/jnj_sent_results.csv")
msft_stock_df, msft_sent_result_df = csv_to_df("../stock_data/msft.csv", "../sentiment_analysis/sentiment_analysis/msft_sent_results.csv")
nflx_stock_df, nflx_sent_result_df = csv_to_df("../stock_data/nflx.csv", "../sentiment_analysis/sentiment_analysis/nflx_sent_results.csv")
pfe_stock_df, pfe_sent_result_df = csv_to_df("../stock_data/pfe.csv", "../sentiment_analysis/sentiment_analysis/pfe_sent_results.csv")
tsla_stock_df, tsla_sent_result_df = csv_to_df("../stock_data/tsla.csv", "../sentiment_analysis/sentiment_analysis/tsla_sent_results.csv")
twr_stock_df, twr_sent_result_df = csv_to_df("../stock_data/twr.csv", "../sentiment_analysis/sentiment_analysis/twr_sent_results.csv")

In [11]:
aapl_sent_result_df = add_count(aapl_sent_result_df)
btc_sent_result_df = add_count(btc_sent_result_df)
jnj_sent_result_df = add_count(jnj_sent_result_df)
msft_sent_result_df = add_count(msft_sent_result_df)
nflx_sent_result_df = add_count(nflx_sent_result_df)
pfe_sent_result_df = add_count(pfe_sent_result_df)
tsla_sent_result_df = add_count(tsla_sent_result_df)
twr_sent_result_df = add_count(twr_sent_result_df)

In [12]:
aapl_tweet_count, aapl_compound = split_df(aapl_sent_result_df)
btc_tweet_count, btc_compound = split_df(btc_sent_result_df)
jnj_tweet_count, jnj_compound = split_df(jnj_sent_result_df)
msft_tweet_count, msft_compound = split_df(msft_sent_result_df)
nflx_tweet_count, nflx_compound = split_df(nflx_sent_result_df)
pfe_tweet_count, pfe_compound = split_df(pfe_sent_result_df)
tsla_tweet_count, tsla_compound = split_df(tsla_sent_result_df)
twr_tweet_count, twr_compound = split_df(twr_sent_result_df)

In [13]:
aapl_compound_mean = calculate_mean(aapl_compound)
btc_compound_mean = calculate_mean(btc_compound)
jnj_compound_mean = calculate_mean(jnj_compound)
msft_compound_mean = calculate_mean(msft_compound)
nflx_compound_mean = calculate_mean(nflx_compound)
pfe_compound_mean = calculate_mean(pfe_compound)
tsla_compound_mean = calculate_mean(tsla_compound)
twr_compound_mean = calculate_mean(twr_compound)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [14]:
aapl_count_sum = calculate_sum(aapl_tweet_count)
btc_count_sum = calculate_sum(btc_tweet_count)
jnj_count_sum = calculate_sum(jnj_tweet_count)
msft_count_sum = calculate_sum(msft_tweet_count)
nflx_count_sum = calculate_sum(nflx_tweet_count)
pfe_count_sum = calculate_sum(pfe_tweet_count)
tsla_count_sum = calculate_sum(tsla_tweet_count)
twr_count_sum = calculate_sum(twr_tweet_count)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [15]:
sum_aapl_sent = join_dfs(aapl_compound_mean, aapl_count_sum)
sum_btc_sent = join_dfs(btc_compound_mean, btc_count_sum)
sum_jnj_sent = join_dfs(jnj_compound_mean, jnj_count_sum)
sum_msft_sent = join_dfs(msft_compound_mean, msft_count_sum)
sum_nflx_sent = join_dfs(nflx_compound_mean, nflx_count_sum)
sum_pfe_sent = join_dfs(pfe_compound_mean, pfe_count_sum)
sum_tsla_sent = join_dfs(tsla_compound_mean, tsla_count_sum)
sum_twr_sent = join_dfs(twr_compound_mean, twr_count_sum)

In [16]:
aapl_input_df = merge_dfs(aapl_stock_df, sum_aapl_sent)
btc_input_df = merge_dfs(btc_stock_df, sum_btc_sent)
jnj_input_df = merge_dfs(jnj_stock_df, sum_jnj_sent)
msft_input_df = merge_dfs(msft_stock_df, sum_msft_sent)
nflx_input_df = merge_dfs(nflx_stock_df, sum_nflx_sent)
pfe_input_df = merge_dfs(pfe_stock_df, sum_pfe_sent)
tsla_input_df = merge_dfs(tsla_stock_df, sum_tsla_sent)
twr_input_df = merge_dfs(twr_stock_df, sum_twr_sent)

In [17]:
create_csv(aapl_input_df, "aapl_input.csv")
create_csv(btc_input_df, "btc_input.csv")
create_csv(jnj_input_df, "jnj_input.csv") 
create_csv(msft_input_df, "msft_input.csv")
create_csv(nflx_input_df, "nflx_input.csv")
create_csv(pfe_input_df, "pfe_input.csv")
create_csv(tsla_input_df, "tsla_input.csv")
create_csv(twr_input_df, "twr_input.csv")