In [1]:
import pandas as pd

# Load the Parquet file
parquet_file_path = 'snp_rsi_macd.parquet'
data = pd.read_parquet(parquet_file_path)

# Save to a CSV file
csv_file_path = 'snp_rsi_macd.csv'
data.to_csv(csv_file_path, index=False)


In [7]:
import zipfile

def compress_csv_zip(input_file, output_zip):
    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
        zf.write(input_file, arcname=input_file)

# Example usage
compress_csv_zip('snp_rsi_macd.csv', 'snp_rsi_macd.zip')


In [2]:
data = pd.read_csv('snp_rsi_macd.csv')

In [3]:
# import numpy as np

# # Function to calculate the Exponential Moving Average (EMA)
# def EMA(prices, span=12, adjust=False):
#     return prices.ewm(span=span, adjust=adjust).mean()

# # Function to calculate MACD
# def MACD(prices, span1=12, span2=26, signal_span=9):
#     fast_ema = EMA(prices, span=span1)
#     slow_ema = EMA(prices, span=span2)
#     macd = fast_ema - slow_ema
#     signal = EMA(macd, span=signal_span)
#     histogram = macd - signal
#     return macd, signal, histogram

# # Function to calculate RSI
# def RSI(prices, periods=14):
#     delta = prices.diff()
#     gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
#     loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
#     rs = gain / loss
#     rsi = 100 - (100 / (1 + rs))
#     return rsi

# # Process each stock and calculate MACD and RSI
# macd_values = pd.DataFrame()
# rsi_values = pd.DataFrame()

# for stock in data['Stock']:
#     prices = data.filter(regex='^\d{4}-\d{2}-\d{2}', axis=1).loc[data['Stock'] == stock].iloc[0]
#     macd, signal, histogram = MACD(prices)
#     rsi = RSI(prices)
    
#     macd_values = pd.concat([macd_values, macd.rename(stock)])
#     rsi_values = pd.concat([rsi_values, rsi.rename(stock)])

# # Append calculated MACD and RSI to the main dataframe
# macd_values = macd_values.T.add_suffix('_MACD')
# rsi_values = rsi_values.T.add_suffix('_RSI')

# # Combine all data together
# full_data = pd.concat([data.set_index('Stock'), macd_values, rsi_values], axis=1).reset_index()

# # Show some of the MACD and RSI data
# full_data[['Stock', '2024-04-30 00:00:00_MACD', '2024-04-30 00:00:00_RSI']].head()


In [4]:
import pandas as pd
import numpy as np

# Helper functions
def EMA(prices, span=12, adjust=False):
    return prices.ewm(span=span, adjust=adjust).mean()

def MACD(prices, span1=12, span2=26, signal_span=9):
    fast_ema = EMA(prices, span=span1)
    slow_ema = EMA(prices, span=span2)
    macd = fast_ema - slow_ema
    signal = EMA(macd, span=signal_span)
    return macd.iloc[-1], signal.iloc[-1]  # Return the last value for simplicity

def RSI(prices, periods=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi.iloc[-1]  # Return the last value for simplicity

# Main function to process data and calculate indicators
def calculate_indicators(data):
    results = pd.DataFrame()
    # Ensure data has only the price columns by filtering non-numeric columns
    numeric_data = data.select_dtypes(include=[np.number])
    # Calculate MACD and RSI
    macd, signal = MACD(numeric_data)
    rsi = RSI(numeric_data)
    # Store results
    results = pd.DataFrame({
        'MACD': macd,
        'Signal': signal,
        'RSI': rsi
    })
    return results


# Ensure 'Stock' column is the index if it exists in the columns
if 'Stock' in data.columns:
    data = data.set_index('Stock')
elif 'Stock' not in data.index:
    print("No 'Stock' column found, please check your DataFrame structure.")

# Calculate MACD and RSI
indicator_results = calculate_indicators(data)

# Display or inspect the results
print(indicator_results.head())


                             MACD        Signal  RSI
2014-05-01 00:00:00 -6.719083e-62 -6.719083e-62  NaN
2014-05-02 00:00:00 -9.786132e-03 -9.786132e-03  NaN
2014-05-05 00:00:00 -1.200990e-02 -1.200990e-02  NaN
2014-05-06 00:00:00  1.109988e-02  1.109988e-02  NaN
2014-05-07 00:00:00  2.853239e-02  2.853239e-02  NaN


In [5]:
import pandas as pd

# Load your data
data = pd.read_csv('snp_rsi_macd.csv')

# Assuming the stock price columns are named as dates and the rest are metadata
price_columns = data.filter(regex=r'\d{4}-\d{2}-\d{2}').columns

# Functions to compute MACD and RSI
def compute_macd(data, fast_period=12, slow_period=26, signal_period=9):
    fast_ema = data.ewm(span=fast_period, adjust=False).mean()
    slow_ema = data.ewm(span=slow_period, adjust=False).mean()
    macd_line = fast_ema - slow_ema
    signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
    return macd_line.iloc[-1], signal_line.iloc[-1]

def compute_rsi(data, periods=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).ewm(span=periods, adjust=False).mean()
    loss = (-delta.where(delta < 0, 0)).ewm(span=periods, adjust=False).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs)).iloc[-1]

# Calculate MACD and RSI for each stock
macd = []
signal = []
rsi = []
for index, row in data[price_columns].iterrows():
    macd_value, signal_value = compute_macd(row)
    rsi_value = compute_rsi(row)
    macd.append(macd_value)
    signal.append(signal_value)
    rsi.append(rsi_value)

# Add these values to the original dataframe
data['MACD'] = macd
data['Signal'] = signal
data['RSI'] = rsi

# Save the updated DataFrame to a new CSV file
data.to_csv('snp_beta_macd_signal_rsi.csv', index=False)

# Check the head of the updated DataFrame
print(data[['Stock', 'Industry', 'Beta', 'MACD', 'Signal', 'RSI']].head())


  Stock                      Industry   Beta      MACD    Signal        RSI
0     A        Diagnostics & Research  1.126 -1.536634 -1.509291  45.444335
1   AAL                      Airlines  1.580 -0.129848 -0.153215  39.044930
2  AAPL          Consumer Electronics  1.264 -0.861375 -1.433049  52.048345
3  ABBV  Drug Manufacturers - General  0.593 -2.645659 -2.440588  39.560258
4  ABNB               Travel Services  1.246  0.133508  0.029556  43.219668


In [6]:
import zipfile

def compress_csv_zip(input_file, output_zip):
    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
        zf.write(input_file, arcname=input_file)

# Example usage
compress_csv_zip('snp_beta_macd_signal_rsi.csv', 'snp_beta_macd_signal_rsi.zip')
