In [1]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)


# Import data

In [2]:
weekly_close_price = pd.read_csv('./weekly_mean_closing_price_by_ticker_all.csv')
weekly_tweet_count = pd.read_csv('./weekly_tweet_count_all.csv')

## Select time period: 2016-01 to 2021-13 (Year-Week) 

### Close prices

In [3]:
weekly_close_price.reset_index
weekly_close_price.set_index('Year-Week',inplace=True)
weekly_close_price = weekly_close_price['2016-01':'2021-13']

# Drop 'DNNGY' 'SCTY' to test from 2016 (have NaN values)
weekly_close_price.drop(columns=['DNNGY','SCTY'], inplace=True)

### Weekly Tweet Count

In [4]:
weekly_tweet_count.set_index('Year-Week', inplace=True)
weekly_tweet_count = weekly_tweet_count['2016-01':'2021-13']

# Merge stock closing prices and Twitter Popularity 

In [5]:
weekly_tweet_count.columns

Index(['Enphase Energy', 'First Solar', 'Siemens', 'Plug Power', 'Sunrun',
       'Sunpower', 'Meridian'],
      dtype='object')

In [6]:
weekly_close_price.columns

Index(['COENF', 'DQ', 'ENPH', 'FSLR', 'GCTAY', 'ORA', 'RUN', 'SPWR', 'TSLA',
       'VWDRY'],
      dtype='object')

## Merge datasets of Enphase Energy

In [9]:
enphase = pd.DataFrame(weekly_tweet_count['Enphase Energy'])
enphase = pd.merge(enphase, weekly_close_price['ENPH'], how='left', left_on=['Year-Week'], right_on=['Year-Week'])
enphase.rename(columns={"Enphase Energy":"Weekly Tweets","ENPH":"close"}, inplace=True)
enphase.dropna(inplace=True)

## Merge datasets of First Solar

In [14]:
first_solar = pd.DataFrame(weekly_tweet_count['First Solar'])
first_solar = pd.merge(first_solar, weekly_close_price['FSLR'], how='left', left_on=['Year-Week'], right_on=['Year-Week'])
first_solar.rename(columns={"First Solar":"Weekly Tweets","FSLR":"close"}, inplace=True)
first_solar.dropna(inplace=True)

## Merge datasets of Siemens

In [15]:
siemens = pd.DataFrame(weekly_tweet_count['Siemens'])
siemens = pd.merge(siemens, weekly_close_price['GCTAY'], how='left', left_on=['Year-Week'], right_on=['Year-Week'])
siemens.rename(columns={"Siemens":"Weekly Tweets","GCTAY":"close"}, inplace=True)
siemens.dropna(inplace=True)

Unnamed: 0_level_0,Weekly Tweets,close
Year-Week,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01,843.0,68.0220
2016-02,463.0,63.0220
2016-03,538.0,63.1075
2016-04,340.0,66.3440
2016-05,258.0,66.8040
...,...,...
2021-09,236.0,79.2040
2021-10,372.0,77.3620
2021-11,177.0,80.5420
2021-12,238.0,81.8980


## Lag Analysis Enphase

In [None]:
fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True)
ax1.xcorr(enphase['Weekly Tweets'].to_numpy().flatten(), enphase['close'].to_numpy().flatten(), usevlines=True, maxlags=50, normed=True, lw=2)
ax1.grid(True)

# ax2.acorr(enphase['Weekly Tweets'].to_numpy().flatten(), usevlines=True, normed=True, maxlags=50, lw=2)
ax2.grid(True)

plt.show()

In [8]:
enphase

Unnamed: 0_level_0,Weekly Tweets,close
Year-Week,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01,327,3.1980
2016-02,205,2.5100
2016-03,252,2.2625
2016-04,206,2.3300
2016-05,218,2.2700
...,...,...
2021-09,1002,162.5380
2021-10,1189,152.6480
2021-11,634,161.5440
2021-12,750,153.1220
