# FX and #Secim2023 Tweet Activity (April–May 2023)

This notebook downloads USD/TRY data, loads monthly tweet counts, and compares tweet volume with FX volatility.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf

plt.style.use('default')  # basic style

# 1) Download daily USD/TRY data
df_fx = yf.download(
    'USDTRY=X',
    start='2023-04-01',
    end='2023-07-01',   # 30 June inclusive
    interval='1d'
)

print('Raw FX data:')
print(df_fx.head())
print(df_fx.index.min(), '→', df_fx.index.max())

# 2) Reset index and simplify columns
df_fx = df_fx.sort_index().reset_index()

new_columns = []
for col in df_fx.columns:
    if isinstance(col, tuple):
        new_columns.append(col[0])  # e.g. ('Close','USDTRY=X') → 'Close'
    else:
        new_columns.append(col)
df_fx.columns = new_columns

df_fx = df_fx.rename(columns={'Date': 'date', 'Close': 'usdtry'})

# 3) Compute log returns and 5-day rolling volatility
df_fx['log_return'] = np.log(df_fx['usdtry'] / df_fx['usdtry'].shift(1))
df_fx['vol_5d'] = df_fx['log_return'].rolling(window=5).std()

# Drop first NaN row in log_return
df_fx = df_fx.dropna(subset=['log_return']).reset_index(drop=True)

# Ensure date is datetime
df_fx['date'] = pd.to_datetime(df_fx['date'])

# 4) Build monthly FX summary for April and May 2023
df_fx['month'] = df_fx['date'].dt.to_period('M').astype(str)
fx_2m = df_fx[df_fx['month'].isin(['2023-04', '2023-05'])]

monthly_fx = (
    fx_2m
    .groupby('month')
    .agg(
        avg_usdtry=('usdtry', 'mean'),
        avg_log_return=('log_return', 'mean'),
        avg_vol_5d=('vol_5d', 'mean'),
        max_vol_5d=('vol_5d', 'max')
    )
    .reset_index()
)

print('Monthly FX summary (April–May 2023):')
print(monthly_fx)

# 5) Load monthly tweet counts from GitHub
tweet_url = 'https://raw.githubusercontent.com/kaantanidir/DSA210-Project/main/data/processed/tweet_counts.csv'
tweets = pd.read_csv(tweet_url)
tweets['month'] = tweets['month'].astype(str)

print('Monthly tweet counts:')
print(tweets)

# 6) Merge tweets and FX summaries
monthly_fx['month'] = monthly_fx['month'].astype(str)
merged = tweets.merge(monthly_fx, on='month', how='inner')

print('Merged monthly dataset:')
print(merged)

# 7) Correlation analysis
corr_matrix = merged[['tweet_count', 'avg_vol_5d', 'max_vol_5d']].corr()
print('Correlation matrix:')
print(corr_matrix)

# 8) Plot tweet volume vs volatility (bar + line)
fig, ax1 = plt.subplots(figsize=(10, 6))

# Bar: tweet counts
ax1.bar(merged['month'], merged['tweet_count'], alpha=0.6)
ax1.set_xlabel('Month')
ax1.set_ylabel('Tweet Count (#Secim2023)', color='black')

# Line: average 5-day volatility
ax2 = ax1.twinx()
ax2.plot(merged['month'], merged['avg_vol_5d'], marker='o', linewidth=3)
ax2.set_ylabel('Average 5-Day Volatility (USD/TRY)', color='black')

plt.title('Tweet Volume vs Exchange Rate Volatility (April–May 2023)')
plt.tight_layout()
plt.show()

# 9) Daily FX plots
# 9a) USD/TRY price
plt.figure(figsize=(10, 4))
plt.plot(df_fx['date'], df_fx['usdtry'])
plt.title('USD/TRY Exchange Rate (2023-04-01 – 2023-06-30)')
plt.xlabel('Date')
plt.ylabel('Exchange Rate')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 9b) Daily log returns
plt.figure(figsize=(10, 4))
plt.plot(df_fx['date'], df_fx['log_return'])
plt.title('USD/TRY Daily Log Returns')
plt.xlabel('Date')
plt.ylabel('Log Return')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 9c) 5-day rolling volatility
plt.figure(figsize=(10, 4))
plt.plot(df_fx['date'], df_fx['vol_5d'])
plt.title('USD/TRY 5-Day Rolling Volatility')
plt.xlabel('Date')
plt.ylabel('Volatility (STD of Log Returns)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 10) (Optional) save processed CSVs into the repo structure
monthly_fx.to_csv('data/processed/fx_monthly_2023-04_05.csv', index=False)
merged.to_csv('data/processed/tweets_fx_monthly_2023-04_05.csv', index=False)
