In [4]:
import pandas as pd

In [17]:
df = pd.read_csv('../green cleaned.csv', dtype={'ncusip': 'string'})
df['ret_fwd_1'] = (df.groupby('permno')['ret_excess'].shift(-1) )

In [18]:
df = df[['datadate', 'permno', 'ticker', 'comnam', 'bm', 'gma', 'ret_fwd_1']]

In [19]:
df

Unnamed: 0,datadate,permno,ticker,comnam,bm,gma,ret_fwd_1
0,1980-01-31,10006,ACF,A C F INDUSTRIES INC,0.017724,-0.553669,-0.067695
1,1980-02-29,10006,ACF,A C F INDUSTRIES INC,0.019257,-0.554980,-0.184178
2,1980-03-31,10006,ACF,A C F INDUSTRIES INC,0.016578,-0.553915,-0.040051
3,1980-04-30,10006,ACF,A C F INDUSTRIES INC,0.011745,-0.555318,0.076577
4,1980-05-31,10006,ACF,A C F INDUSTRIES INC,0.013093,-0.553012,-0.013676
...,...,...,...,...,...,...,...
269250,2024-08-31,93436,TSLA,TESLA INC,-0.847287,-0.052311,0.217942
269251,2024-09-30,93436,TSLA,TESLA INC,-0.852062,-0.050266,-0.048925
269252,2024-10-31,93436,TSLA,TESLA INC,-0.852640,-0.042799,0.377469
269253,2024-11-30,93436,TSLA,TESLA INC,-0.846898,-0.047995,0.166308


In [20]:
import pandas as pd
import os

# 1. Setup: Ensure date is datetime
df['datadate'] = pd.to_datetime(df['datadate'])

# 2. Define the years you want to generate files for
# (Adjust this range if you need more years)
target_years = [2020, 2021, 2022, 2023, 2024]

print(f"Processing Novy-Marx portfolios for: {target_years}\n")

for year in target_years:
    # ---------------------------------------------------------
    # A. Identify the 'Signal Date' (Dec 31 of previous year)
    #    Example: For Year 2020, we want data from 2019-12-31
    # ---------------------------------------------------------
    prev_year = year - 1
    
    # We look for the last day of the previous year. 
    # Note: Adjust logic if your data uses Jan 31st for the current year.
    # Based on your screenshot, we target Dec 31 of prev year.
    target_date = pd.Timestamp(f'{prev_year}-12-31')
    
    # Filter the dataframe for exactly this date
    # (Using string matching for robustness in case of time components)
    subset = df[df['datadate'] == target_date].copy()
    
    # Check if data exists for this date
    if subset.empty:
        print(f"⚠️ Warning: No data found for {target_date.date()}. Skipping {year}.")
        continue
        
    # ---------------------------------------------------------
    # B. Apply Novy-Marx Logic (Rank Sum)
    # ---------------------------------------------------------
    # Rank BM: Higher = Cheaper (Better)
    rank_bm = subset['bm'].rank(ascending=True, method='first')
    
    # Rank GMA: Higher = More Profitable (Better)
    rank_gma = subset['gma'].rank(ascending=True, method='first')
    
    # Combine
    subset['combined_score'] = rank_bm + rank_gma
    
    # ---------------------------------------------------------
    # C. Select Top 150 (Buys) and Bottom 150 (Sells)
    # ---------------------------------------------------------
    # Buys: Highest combined score
    buys = subset.nlargest(150, 'combined_score')[['datadate', 'permno', 'ticker', 'comnam', 'ret_fwd_1']].copy()
    
    # Sells: Lowest combined score
    sells = subset.nsmallest(150, 'combined_score')[['datadate', 'permno', 'ticker', 'comnam', 'ret_fwd_1']].copy()
    
    # ---------------------------------------------------------
    # D. Save to CSV
    # ---------------------------------------------------------
    buy_filename = f'novy_marx_buys_{year}.csv'
    sell_filename = f'novy_marx_sells_{year}.csv'
    
    buys.to_csv(buy_filename, index=False)
    sells.to_csv(sell_filename, index=False)
    
    print(f"✅ Generated {year}: {buy_filename} ({len(buys)} stocks) & {sell_filename} ({len(sells)} stocks) using data from {target_date.date()}")

print("\nProcessing Complete.")

Processing Novy-Marx portfolios for: [2020, 2021, 2022, 2023, 2024]

✅ Generated 2020: novy_marx_buys_2020.csv (150 stocks) & novy_marx_sells_2020.csv (150 stocks) using data from 2019-12-31
✅ Generated 2021: novy_marx_buys_2021.csv (150 stocks) & novy_marx_sells_2021.csv (150 stocks) using data from 2020-12-31
✅ Generated 2022: novy_marx_buys_2022.csv (150 stocks) & novy_marx_sells_2022.csv (150 stocks) using data from 2021-12-31
✅ Generated 2023: novy_marx_buys_2023.csv (150 stocks) & novy_marx_sells_2023.csv (150 stocks) using data from 2022-12-31
✅ Generated 2024: novy_marx_buys_2024.csv (150 stocks) & novy_marx_sells_2024.csv (150 stocks) using data from 2023-12-31

Processing Complete.


In [21]:
buys

Unnamed: 0,datadate,permno,ticker,comnam,ret_fwd_1
76005,2023-12-31,22976,WBD,WARNER BROS DISCOVERY INC,-0.124208
262560,2023-12-31,90720,BLDR,BUILDERS FIRSTSOURCE INC,0.035973
185749,2023-12-31,61241,AMD,ADVANCED MICRO DEVICES INC,0.132875
222507,2023-12-31,77661,DHI,D R HORTON INC,-0.064379
115912,2023-12-31,34817,NUE,NUCOR CORP,0.069363
...,...,...,...,...,...
264096,2023-12-31,91103,UAL,UNITED AIRLINES HOLDINGS INC,-0.001792
41532,2023-12-31,16851,DD,DUPONT DE NEMOURS INC,-0.201372
215373,2023-12-31,76226,PARA,PARAMOUNT GLOBAL,-0.018223
240101,2023-12-31,84373,DGX,QUEST DIAGNOSTICS INC,-0.068089
