In [232]:
import pandas as pd
import requests
from datetime import datetime, timedelta

def fetch_gdelt_data(start_date, end_date):
    base_url = "http://data.gdeltproject.org/events/"
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    sentiment_data = []

    for single_date in date_range:
        date_str = single_date.strftime("%Y%m%d")
        url = f"{base_url}{date_str}.export.CSV.zip"
        
        try:
            response = requests.get(url)
            if response.status_code == 200:
                df = pd.read_csv(url, compression='zip', sep='\t', header=None, low_memory=False)
                
                # Column indices for sentiment
                avgtone_index = 34
                
                avg_tone = df[avgtone_index].mean()
                sentiment_data.append({
                    "date": single_date,
                    "avg_tone": avg_tone
                })
            else:
                print(f"Data not found for {date_str}")
        except Exception as e:
            print(f"Error fetching data for {date_str}: {e}")

    return pd.DataFrame(sentiment_data)

# Define the date range
start_date = "2025-01-01"
end_date = "2025-02-03"

# Fetch the data
sentiment_df = fetch_gdelt_data(start_date, end_date)

# Display the resulting dataframe
print(sentiment_df.head())


        date  avg_tone
0 2025-01-01 -1.930671
1 2025-01-02 -2.037406
2 2025-01-03 -2.245253
3 2025-01-04 -2.257718
4 2025-01-05 -2.189166


In [233]:
import pandas as pd

In [234]:
# Load the Parquet file
df = pd.read_parquet("reports/TechnicalScorer.parquet")
df

Unnamed: 0,date,model,top_tickers,scores,report_md
0,2024-05-17,TechnicalScorer,"[BAX, CRL, GPN, LULU, ARE, AMP]","[4.5454545, 4.5454545, 4.5454545, 4.5454545, 3...",# Trading Advisor Report\n\nGenerated on: 2025...
1,2024-05-18,TechnicalScorer,[],[],# Trading Advisor Report\n\nGenerated on: 2025...
2,2024-05-19,TechnicalScorer,[],[],# Trading Advisor Report\n\nGenerated on: 2025...
3,2024-05-22,TechnicalScorer,"[JBHT, SLB, TGT, ULTA, BMY, BAX]","[6.363636, 6.363636, 6.363636, 6.363636, 5.454...",# Trading Advisor Report\n\nGenerated on: 2025...
4,2024-05-23,TechnicalScorer,"[ALGN, BMY, CZR, CLX, ITW, MCD]","[6.363636, 6.363636, 6.363636, 6.363636, 6.363...",# Trading Advisor Report\n\nGenerated on: 2025...
5,2025-05-23,TechnicalScorer,"[CPRT, KHC, AFL, ARE, AMCR, AVB, CRL, CPAY, FS...","[6.363636, 6.363636, 4.5454545, 4.5454545, 4.5...",# Trading Advisor Report\n\nGenerated on: 2025...
6,2025-05-29,TechnicalScorer,[],[],# Trading Advisor Report\n\nGenerated on: 2025...
7,2025-05-30,TechnicalScorer,[],[],# Trading Advisor Report\n\nGenerated on: 2025...
8,2025-05-31,TechnicalScorer,[],[],# Trading Advisor Report\n\nGenerated on: 2025...
9,2025-06-02,TechnicalScorer,[],[],# Trading Advisor Report\n\nGenerated on: 2025...


In [222]:
df.iloc[-1]

Open                                                      337.515015
High                                                      338.649994
Low                                                       333.209991
Close                                                     337.045013
Volume                                                      25402923
Dividends                                                        0.0
Stock Splits                                                     0.0
RSI                                                        63.058972
MACD                                                       19.993312
MACD_Signal                                                17.951582
MACD_Hist                                                    2.04173
BB_Upper                                                   368.68139
BB_Lower                                                  254.744116
BB_Middle                                                 311.712753
BB_Pband                          

In [219]:
import pandas as pd

# Load the Parquet file
df = pd.read_parquet("reports/TechnicalScorer.parquet")

print(len(df))
print(df.iloc[0].name)

500
0


# Trading Advisor Report

Generated on: 2025-05-23 16:23:11

## Current Positions

### CRL
**Technical Score:** 3.64/10
**OHLC:** Open: 134.21, High: 135.62, Low: 133.85, Close: 134.79
**Current Position**
**Technical Indicators:**
- RSI: 51.68
- MACD value: 2.77
- MACD signal: 2.03
- MACD histogram: 0.74
- BOLLINGER_BANDS upper: 153.53
- BOLLINGER_BANDS middle: 131.72
- BOLLINGER_BANDS lower: 109.92
- MOVING_AVERAGES sma_20: 131.72
**Analyst Targets:**
- Median: $159.28
- Range: $70.0 - $215.0

### CPAY
**Technical Score:** 2.73/10
**OHLC:** Open: 319.87, High: 323.46, Low: 321.35, Close: 322.78
**Current Position**
**Technical Indicators:**
- RSI: 43.45
- MACD value: 2.13
- MACD signal: 3.71
- MACD histogram: -1.58
- BOLLINGER_BANDS upper: 357.23
- BOLLINGER_BANDS middle: 333.69
- BOLLINGER_BANDS lower: 310.15
- MOVING_AVERAGES sma_20: 333.69
**Analyst Targets:**
- Median: $393.63
- Range: $330.0 - $445.0

### FSLR
**Technical Score:** 2.73/10
**OHLC:** Open: 154.0, High: 160.2, Low: 152.66, Close: 159.44
**Current Position**
**Technical Indicators:**
- RSI: 54.44
- MACD value: 8.79
- MACD signal: 8.99
- MACD histogram: -0.2
- BOLLINGER_BANDS upper: 196.69
- BOLLINGER_BANDS middle: 151.79
- BOLLINGER_BANDS lower: 106.89
- MOVING_AVERAGES sma_20: 151.79
**Analyst Targets:**
- Median: $200.05
- Range: $100.0 - $304.0

### REGN
**Technical Score:** 2.73/10
**OHLC:** Open: 586.85, High: 588.51, Low: 582.37, Close: 587.14
**Current Position**
**Technical Indicators:**
- RSI: 49.53
- MACD value: 0.33
- MACD signal: -4.21
- MACD histogram: 4.54
- BOLLINGER_BANDS upper: 627.57
- BOLLINGER_BANDS middle: 582.97
- BOLLINGER_BANDS lower: 538.36
- MOVING_AVERAGES sma_20: 582.97
**Analyst Targets:**
- Median: $797.21
- Range: $535.0 - $958.0

## New Technical Picks (Top 6) for 2025-05-23 (TechnicalScorer)

### CPRT
**Technical Score:** 6.36/10
**OHLC:** Open: 57.98, High: 58.07, Low: 54.44, Close: 54.53
**Technical Indicators:**
- RSI: 28.19
- MACD value: 0.18
- MACD signal: 0.92
- MACD histogram: -0.74
- BOLLINGER_BANDS upper: 64.9
- BOLLINGER_BANDS middle: 61.23
- BOLLINGER_BANDS lower: 57.56
- MOVING_AVERAGES sma_20: 61.23
**Analyst Targets:**
- Median: $61.51
- Range: $56.0 - $67.0

### KHC
**Technical Score:** 6.36/10
**OHLC:** Open: 26.48, High: 26.52, Low: 26.03, Close: 26.23
**Technical Indicators:**
- RSI: 29.7
- MACD value: -0.68
- MACD signal: -0.53
- MACD histogram: -0.14
- BOLLINGER_BANDS upper: 29.53
- BOLLINGER_BANDS middle: 27.95
- BOLLINGER_BANDS lower: 26.36
- MOVING_AVERAGES sma_20: 27.95
**Analyst Targets:**
- Median: $31.56
- Range: $26.0 - $53.34

### AFL
**Technical Score:** 4.55/10
**OHLC:** Open: 102.0, High: 102.48, Low: 101.73, Close: 101.84
**Technical Indicators:**
- RSI: 38.11
- MACD value: -0.85
- MACD signal: -0.54
- MACD histogram: -0.31
- BOLLINGER_BANDS upper: 108.53
- BOLLINGER_BANDS middle: 105.14
- BOLLINGER_BANDS lower: 101.75
- MOVING_AVERAGES sma_20: 105.14
**Analyst Targets:**
- Median: $106.0
- Range: $94.0 - $120.0

### ARE
**Technical Score:** 4.55/10
**OHLC:** Open: 67.8, High: 68.48, Low: 67.8, Close: 68.2
**Technical Indicators:**
- RSI: 32.39
- MACD value: -2.86
- MACD signal: -2.93
- MACD histogram: 0.07
- BOLLINGER_BANDS upper: 76.64
- BOLLINGER_BANDS middle: 72.43
- BOLLINGER_BANDS lower: 68.21
- MOVING_AVERAGES sma_20: 72.43
**Analyst Targets:**
- Median: $104.46
- Range: $76.0 - $144.0

### AMCR
**Technical Score:** 4.55/10
**OHLC:** Open: 8.95, High: 8.98, Low: 8.92, Close: 8.93
**Technical Indicators:**
- RSI: 39.01
- MACD value: -0.08
- MACD signal: -0.06
- MACD histogram: -0.02
- BOLLINGER_BANDS upper: 9.55
- BOLLINGER_BANDS middle: 9.24
- BOLLINGER_BANDS lower: 8.92
- MOVING_AVERAGES sma_20: 9.24
**Analyst Targets:**
- Median: $11.59
- Range: $10.83 - $13.0

### AVB
**Technical Score:** 4.55/10
**OHLC:** Open: 199.4, High: 199.41, Low: 197.69, Close: 198.42
**Technical Indicators:**
- RSI: 39.48
- MACD value: -1.08
- MACD signal: -0.25
- MACD histogram: -0.83
- BOLLINGER_BANDS upper: 213.46
- BOLLINGER_BANDS middle: 206.39
- BOLLINGER_BANDS lower: 199.31
- MOVING_AVERAGES sma_20: 206.39
**Analyst Targets:**
- Median: $234.0
- Range: $220.0 - $255.0


In [220]:
df

Unnamed: 0,date,ticker,score
0,2024-05-23 00:00:00,A,2.727273
1,2024-05-23 00:00:00,AAPL,2.727273
2,2024-05-23 00:00:00,ABBV,3.636364
3,2024-05-23 00:00:00,ABNB,2.727273
4,2024-05-23 00:00:00,ABT,0.000000
...,...,...,...
495,2024-05-23 00:00:00,XYL,2.727273
496,2024-05-23 00:00:00,YUM,0.909091
497,2024-05-23 00:00:00,ZBH,3.636364
498,2024-05-23 00:00:00,ZBRA,1.818182


In [237]:
import pandas as pd

# Load the Parquet file
df = pd.read_parquet("features/GOOGL_features.parquet")

print(len(df))
print(df.iloc[-1])

1262
Open                                                      167.839996
High                                                      169.869995
Low                                                       167.389999
Close                                                     168.990005
Volume                                                      17186772
Dividends                                                        0.0
Stock Splits                                                     0.0
RSI                                                        55.998633
MACD                                                         3.29387
MACD_Signal                                                 2.722749
MACD_Hist                                                   0.571121
BB_Upper                                                  177.615916
BB_Lower                                                  151.999085
BB_Middle                                                   164.8075
BB_Pband                     

In [209]:
df = pd.read_parquet("model_outputs/TechnicalScorer/AAPL.parquet")

In [210]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,RSI,MACD,MACD_Signal,...,BB_Middle,BB_Pband,SMA_20,SMA_50,SMA_100,SMA_200,EMA_100,EMA_200,analyst_targets,score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-05-26,78.616095,78.795926,76.914974,76.970871,125522000,0.0,0.0,,,,...,,,,,,,,,,0.000000
2020-05-27,76.827477,77.452025,76.086269,77.306213,112945200,0.0,0.0,,,,...,,,,,,,,,,0.000000
2020-05-28,76.980580,78.601508,76.703544,77.340248,133560800,0.0,0.0,,,,...,,,,,,,,,,0.000000
2020-05-29,77.583268,78.044999,76.907680,77.264915,153532400,0.0,0.0,,,,...,,,,,,,,,,0.000000
2020-06-01,77.218738,78.336619,77.087507,78.215111,80791200,0.0,0.0,,,,...,,,,,,,,,,0.909091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-19,207.910004,209.479996,204.259995,208.779999,46140500,0.0,0.0,52.173875,0.768095,-0.535925,...,206.398450,0.599838,206.398450,207.628971,222.960229,225.628424,216.854282,217.715056,,1.818182
2025-05-20,207.669998,208.470001,205.029999,206.860001,42496600,0.0,0.0,50.044535,0.659609,-0.296818,...,206.767529,0.504022,206.767529,207.222529,222.482272,225.576006,216.656376,217.607046,,1.818182
2025-05-21,205.169998,207.039993,200.710007,202.089996,59211800,0.0,0.0,45.117973,0.186582,-0.200138,...,206.655425,0.303764,206.655425,206.853313,221.927385,225.492273,216.367933,217.452647,,2.727273
2025-05-22,200.710007,202.750000,199.699997,201.360001,46633900,0.0,0.0,44.397639,-0.244382,-0.208987,...,206.318570,0.290502,206.318570,206.546597,221.357018,225.457594,216.070746,217.292521,"{""current_price"": 195.9312, ""median_target"": 2...",2.727273


In [215]:
import os
import pandas as pd
from pathlib import Path

features_dir = Path("features")
min_rows = 1257

for file in features_dir.glob("*_features.parquet"):
    try:
        df = pd.read_parquet(file)
        if len(df) < min_rows:
            print(f"Deleting {file.name}: only {len(df)} rows (expected at least {min_rows})")
            file.unlink()
        else:
            print(f"{file.name}: {len(df)} rows (OK)")
    except Exception as e:
        print(f"Error reading {file.name}: {e} (deleting file)")
        file.unlink()

HON_features.parquet: 1257 rows (OK)
HUBB_features.parquet: 1257 rows (OK)
FDS_features.parquet: 1257 rows (OK)
AXP_features.parquet: 1257 rows (OK)
NI_features.parquet: 1257 rows (OK)
PSX_features.parquet: 1257 rows (OK)
CBRE_features.parquet: 1257 rows (OK)
ED_features.parquet: 1257 rows (OK)
HLT_features.parquet: 1257 rows (OK)
AMZN_features.parquet: 1257 rows (OK)
ABT_features.parquet: 1257 rows (OK)
GLW_features.parquet: 1257 rows (OK)
MS_features.parquet: 1257 rows (OK)
AVB_features.parquet: 1257 rows (OK)
BMY_features.parquet: 1257 rows (OK)
NTRS_features.parquet: 1257 rows (OK)
SPGI_features.parquet: 1257 rows (OK)
CNP_features.parquet: 1257 rows (OK)
SHW_features.parquet: 1257 rows (OK)
MDLZ_features.parquet: 1257 rows (OK)
Deleting COIN_features.parquet: only 1034 rows (expected at least 1257)
CFG_features.parquet: 1257 rows (OK)
JKHY_features.parquet: 1257 rows (OK)
EQIX_features.parquet: 1257 rows (OK)
CMCSA_features.parquet: 1257 rows (OK)
MTD_features.parquet: 1257 rows (

In [211]:
df = df.iloc[:-1]
df.to_parquet("model_outputs/TechnicalScorer/AAPL.parquet")
print(df.iloc[-1, df.columns.get_loc('analyst_targets')])
print(len(df))

{"current_price": 195.9312, "median_target": 228.75928, "low_target": 170.62, "high_target": 300.0}
1256


In [198]:
df.index

DatetimeIndex(['2020-05-26', '2020-05-27', '2020-05-28', '2020-05-29',
               '2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04',
               '2020-06-05', '2020-06-08',
               ...
               '2025-05-12', '2025-05-13', '2025-05-14', '2025-05-15',
               '2025-05-16', '2025-05-19', '2025-05-20', '2025-05-21',
               '2025-05-22', '2025-05-23'],
              dtype='datetime64[ns]', name='Date', length=1257, freq=None)

In [192]:
print(df.iloc[-1])

AttributeError: 'DataFrame' object has no attribute 'Date'

In [179]:
df.iloc[-2, df.columns.get_loc('analyst_targets')] = df.iloc[-1]['analyst_targets']

In [180]:
print(df.iloc[-2, df.columns.get_loc('analyst_targets')])

{"current_price": 195.9312, "median_target": 228.75928, "low_target": 170.62, "high_target": 300.0}


In [181]:
df = df.iloc[:-1]
df.to_parquet("features/AAPL_features.parquet")
print(df.iloc[-1, df.columns.get_loc('analyst_targets')])

{"current_price": 195.9312, "median_target": 228.75928, "low_target": 170.62, "high_target": 300.0}


In [182]:
df = pd.read_parquet("features/AAPL_features.parquet")
print(df.iloc[-1]['analyst_targets'])  # This should be the value you manually set in the previous second-to-last row

{"current_price": 195.9312, "median_target": 228.75928, "low_target": 170.62, "high_target": 300.0}


In [186]:
print(df.iloc[-2]['analyst_targets'])

{"current_price": 195.9312, "median_target": 228.75928, "low_target": 170.62, "high_target": 300.0}


In [187]:
print(df.iloc[-3]['analyst_targets'])

None
