In [1]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)

# Stock prices

## Import Stock prices

In [2]:
stock_prices = pd.read_csv('../../../data/team/Stock Price Time Series/stock_prices_time_series_converged.csv')

In [3]:
stock_prices['Date'] = pd.to_datetime(stock_prices['timestamp'], errors='coerce')

In [4]:
stock_prices.drop(columns=['Unnamed: 9','Unnamed: 10'], inplace=True)
stock_prices.drop(columns=['mean delta', '-0.073169045'], inplace=True)

In [5]:
stock_prices.set_index('Date', inplace=True);

In [6]:
stock_prices.drop(['timestamp'],axis=1, inplace=True)

## Export GCTAY close prices

In [7]:
stock_prices['Ticker'].unique()

array(['SPWR', 'COENF', 'DNNGY', 'DQ', 'ENPH', 'FSLR', 'GCTAY', 'ORA',
       'RUN', 'SCTY', 'TSLA', 'VWDRY'], dtype=object)

In [8]:
stock_prices_gctay = stock_prices[stock_prices['Ticker'] == 'GCTAY'].copy()

In [9]:
stock_prices_gctay.sort_index(ascending=True, inplace=True)

In [10]:
gctay_closing_daily = pd.DataFrame(stock_prices_gctay['close'])

In [11]:
gctay_closing_daily

Unnamed: 0_level_0,close
Date,Unnamed: 1_level_1
2009-02-05,3.290
2009-02-06,3.290
2009-02-09,3.290
2009-02-10,3.500
2009-02-11,3.500
...,...
2021-04-19,7.150
2021-04-20,6.990
2021-04-21,7.272
2021-04-22,7.670


# Import Sentiment 

In [12]:
sentiment = pd.read_csv('../../../data/team/NLTK Time Series/sentiment_nltk_fixed_final.csv')

In [13]:
sentiment.drop('Unnamed: 0', axis=1, inplace=True)

In [14]:
sentiment['Date'] = pd.to_datetime(sentiment['CreatedAt'].str[:10], errors='coerce')

In [15]:
sentiment.set_index('Date',inplace=True);

In [16]:
sentiment.drop('CreatedAt', axis=1, inplace=True)

In [17]:
sentiment

Unnamed: 0_level_0,company,TweetText,Tweet_lemmatized,neg,neu,pos,compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-04-01,first_solar,First Solar Completes Sale of North American O&amp;M Business to NovaSource $FSLR https://t.co/j...,"['first', 'solar', 'completes', 'sale', 'north', 'american', 'oampm', 'business', 'novasource', ...",0.000,0.000,0.000,0.0000
2021-03-31,first_solar,$ALYI To Reveal New Significant Investment In Company From Leading Texas Based Private Equity Fu...,"['alyi', 'reveal', 'new', 'significant', 'investment', 'company', 'leading', 'texas', 'based', '...",0.000,1.000,0.000,0.0000
2021-03-31,first_solar,@SolarPowerEU @renewableH2EU @EU_Commission @Akuo_Energy @enelgreenpower @EdpRenewables @Enapter...,"['solarpowereu', 'renewableheu', 'eucommission', 'akuoenergy', 'enelgreenpower', 'edprenewables'...",0.000,0.927,0.073,0.2023
2021-03-31,first_solar,$FSLR sector move confirmed https://t.co/h08Dwn5CIF,"['fslr', 'sector', 'move', 'confirmed', 'httpstcohdwncif']",0.081,0.848,0.071,-0.0754
2021-03-31,first_solar,$FSLR NovaSource Power Services is World's Largest Solar O&amp;M Provider with Acquisition of Fi...,"['fslr', 'novasource', 'power', 'service', 'world', 'largest', 'solar', 'oampm', 'provider', 'ac...",0.000,1.000,0.000,0.0000
...,...,...,...,...,...,...,...
2011-01-10,meridian_energy,Panic Attacks Cure: Anxiety And Panic Attacks Cure - Energy Healing - Meridian Energy Therapy - ...,"['panic', 'attack', 'cure', 'anxiety', 'panic', 'attack', 'cure', 'energy', 'healing', 'meridian...",0.537,0.222,0.241,-0.8402
2011-01-07,meridian_energy,Just had some Meridian Energy guys come around saying they could save me a lot money. Said I was...,"['meridian', 'energy', 'guy', 'come', 'around', 'saying', 'could', 'save', 'lot', 'money', 'said...",0.537,0.222,0.241,-0.8402
2011-01-06,meridian_energy,EFT Cafe - Gratitude Tapping: AAMET - Association for the Advancement of Meridian Energy Techniq...,"['eft', 'cafe', 'gratitude', 'tapping', 'aamet', 'association', 'advancement', 'meridian', 'ener...",0.122,0.609,0.269,0.4393
2011-01-02,meridian_energy,Anxiety and Panic Attacks Cure - Energy Healing - Meridian Energy Therapy - Quantum Solutions ht...,"['anxiety', 'panic', 'attack', 'cure', 'energy', 'healing', 'meridian', 'energy', 'therapy', 'qu...",0.000,0.649,0.351,0.6597


## Create daily tweet count 

In [18]:
sentiment['company'].unique()

array(['first_solar', 'siemens_gamesa', 'plug_power', 'tesla', 'sunrun',
       'sunpower', 'meridian_energy', nan], dtype=object)

In [19]:
gctay_tweets = sentiment[sentiment['company'] == 'siemens_gamesa']

In [20]:
gctay_tweets_daily = gctay_tweets.groupby(['Date']).count()

In [21]:
gctay_tweets_daily = pd.DataFrame(gctay_tweets_daily['compound'])

In [22]:
gctay_tweets_daily.rename(columns={'compound':'Daily Tweets'},inplace=True)

In [23]:
gctay_tweets_daily

Unnamed: 0_level_0,Daily Tweets
Date,Unnamed: 1_level_1
2011-01-01,2
2011-01-02,2
2011-01-03,5
2011-01-04,6
2011-01-05,5
...,...
2021-03-28,51
2021-03-29,118
2021-03-30,129
2021-03-31,129


## Create compount sentiment mean

In [24]:
gctay_sentiment = sentiment[sentiment['company'] == 'siemens_gamesa']

In [25]:
gctay_sentiment = pd.DataFrame(gctay_sentiment['compound'])

In [26]:
gctay_sentiment_daily = gctay_sentiment.groupby(['Date']).mean()

In [27]:
gctay_sentiment_daily

Unnamed: 0_level_0,compound
Date,Unnamed: 1_level_1
2011-01-01,0.660500
2011-01-02,0.273200
2011-01-03,0.470360
2011-01-04,0.322400
2011-01-05,0.477660
...,...
2021-03-28,0.480627
2021-03-29,0.280736
2021-03-30,0.430319
2021-03-31,0.327564


# Merge SPWR datasets

In [28]:
gctay = pd.merge(gctay_closing_daily, gctay_tweets_daily, how='left', left_on=['Date'], right_on=['Date'] )

In [29]:
gctay.dropna(inplace=True)

In [30]:
gctay = pd.merge(gctay, gctay_sentiment_daily,how='left', left_on=['Date'], right_on=['Date'] )

In [31]:
gctay

Unnamed: 0_level_0,close,Daily Tweets,compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-01-03,1.6900,5.0,0.470360
2011-01-04,1.6900,6.0,0.322400
2011-01-05,1.6900,5.0,0.477660
2011-01-06,1.6900,3.0,0.391933
2011-01-07,1.6900,4.0,0.241975
...,...,...,...
2021-03-26,6.8900,95.0,0.331604
2021-03-29,6.9400,118.0,0.280736
2021-03-30,7.3225,129.0,0.430319
2021-03-31,7.7400,129.0,0.327564


In [32]:
gctay.rename(columns={'close':'Closing Price', 'compound':'Sentiment Score'}, inplace=True)

In [33]:
gctay

Unnamed: 0_level_0,Closing Price,Daily Tweets,Sentiment Score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-01-03,1.6900,5.0,0.470360
2011-01-04,1.6900,6.0,0.322400
2011-01-05,1.6900,5.0,0.477660
2011-01-06,1.6900,3.0,0.391933
2011-01-07,1.6900,4.0,0.241975
...,...,...,...
2021-03-26,6.8900,95.0,0.331604
2021-03-29,6.9400,118.0,0.280736
2021-03-30,7.3225,129.0,0.430319
2021-03-31,7.7400,129.0,0.327564


In [35]:
# gctay.to_csv('./gctay_closing_popularity_sentiment.csv')

# Normalized SPWR data

In [None]:
gctay_normalized = gctay.copy()

In [None]:
gctay_normalized

In [None]:
def normalize_daily_tweets(df):
    max_count = df['Daily Tweets'].max()
    min_count = df['Daily Tweets'].min()
    df['Daily Tweets'] = (df['Daily Tweets'] - min_count) / (max_count - min_count)
    return df

def normalize_sentiment_score(df):
    max_count = df['Sentiment Score'].max()
    min_count = df['Sentiment Score'].min()
    df['Sentiment Score'] = (df['Sentiment Score'] - min_count) / (max_count - min_count)
    return df

def normalize_closing_price(df):
    max_count = df['Closing Price'].max()
    min_count = df['Closing Price'].min()
    df['Closing Price'] = (df['Closing Price'] - min_count) / (max_count - min_count)
    return df

In [None]:
gctay_normalized = normalize_daily_tweets(gctay_normalized)

In [None]:
gctay_normalized = normalize_sentiment_score(gctay_normalized)

In [None]:
gctay_normalized =  normalize_closing_price(gctay_normalized)

In [None]:
gctay_normalized

In [None]:
gctay_normalized.to_csv('./gctay_normalized_closing_popularity_sentiment.csv')

In [None]:
gctay_normalized[['Sentiment Score','Closing Price']]['2020-12-01':'2021-04-01'].plot(figsize=(18,5))

In [None]:
gctay_normalized[['Daily Tweets','Closing Price']]['2020-03-01':'2021-04-01'].plot(figsize=(12,8));