In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("C:\\Users\\laimi\\Downloads\\pendingpositions_20240816_043004.csv")

In [None]:
# Check for missing values
print("checking for null values")
print(df.isnull().sum())

print("\ndescribing the data set to make sure statistical values are as expected")
print(df.describe())

print("\ninfo for the dataset to make sure data types are appropriate")
print(df.info())

In [None]:
# clean step
print("Cleaning up dataset, adding calculated columns")

# number of rows before clean
print(f"number of rows before clean: {len(df)}")

df_cleaned = df.dropna(subset="StopPrice").copy()

# convert rows to proper types
df_cleaned["Created"] = pd.to_datetime(df_cleaned["Created"])
df_cleaned["Closed"] = pd.to_datetime(df_cleaned["Closed"])
df_cleaned["PositionSize"] = df_cleaned['NumberOfShares'] * df_cleaned["Bid"]
df_cleaned["StopPricePct"] = ((df_cleaned["StopPrice"] - df_cleaned["Bid"])/df_cleaned["Bid"])*100
df_cleaned["StopLossAmount"] = (df_cleaned["StopPrice"] - df_cleaned["Bid"])*df_cleaned["NumberOfShares"]
df_cleaned['Duration'] = (df_cleaned['Closed'] - df_cleaned['Created']).dt.days

print(f"number of rows after clean: {len(df_cleaned)}")

In [None]:
print("the same summary information but now on cleaned data")

print("\ndescribing the data set to make sure statistical values are as expected")
print(df_cleaned.describe())

print("\ninfo for the dataset to make sure data types are appropriate")
print(df_cleaned.info())

In [None]:
print("Splitting positions into longs and shorts")

# split into longs and shorts
longs = df_cleaned[df_cleaned["NumberOfShares"] > 0].copy()
shorts = df_cleaned[df_cleaned["NumberOfShares"] < 0].copy()

longs_purchased = longs[longs["Purchased"]==1]
shorts_purchased = shorts[shorts["Purchased"]==1]

print(f"Number of longs: {len(longs)}")
purchased_positions_longs = longs['Purchased'].sum()
total_long_positions = len(longs)
purchased_percentage_longs = (purchased_positions_longs / total_long_positions) * 100
print(f"% of Longs purchased: {purchased_positions_longs} ({purchased_percentage_longs:.2f}%)")
num_days_longs = (longs['Created'].max() - longs['Created'].min()).days
print(f"Longs spanning {num_days_longs} Number of days")


print(f"\nnumber of shorts: {len(shorts)}")
purchased_positions_shorts = shorts['Purchased'].sum()
total_short_positions = len(shorts)
purchased_percentage_shorts = (purchased_positions_shorts / total_short_positions) * 100
print(f"% of Shorts purchased: {purchased_positions_shorts} ({purchased_percentage_shorts:.2f}%)")
num_days_short = (shorts['Created'].max() - shorts['Created'].min()).days
print(f"Shorts spanning {num_days_short} Number of days")



In [None]:
print("From this point forward, analyzing longs only\n")

print(f'Average size of positions: {longs["PositionSize"].mean():.2f}')
print(f'Max position size: {longs["PositionSize"].max():.2f}')
print(f'Min position size: {longs["PositionSize"].min():.2f}')

longs['rolling_average_size'] = longs["PositionSize"].rolling(window=30).mean()

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.lineplot(data=longs, x='Created', y='rolling_average_size')
plt.title('Rolling Average Size of Positions Over Time')
plt.xlabel('Date')
plt.ylabel('Average Size of Positions')
plt.show()

# Plot the distribution of position sizes
plt.figure(figsize=(12, 6))
sns.histplot(longs["PositionSize"], bins=30, kde=True)
plt.title('Distribution of Position Sizes')
plt.xlabel('Position Size')
plt.ylabel('Frequency')
plt.show()

# plot the distribution of purchased position sizes
plt.figure(figsize=(12, 6))
sns.histplot(longs_purchased["PositionSize"], bins=30, kde=True)
plt.title('Distribution of Position Sizes of Purchased Positions')
plt.xlabel('Position Size')
plt.ylabel('Frequency')
plt.show()

In [None]:
columns_of_interest = ["Ticker","PositionSize","Created","Bid","StopPrice","StopPricePct","StopLossAmount","Strategy","Purchased","Duration"]

# List the top 10 largest position rows
largest_positions = longs.nlargest(10, ["PositionSize"])
print("\nTop 10 Largest Positions:")
largest_positions[columns_of_interest]

In [None]:
# List the top 10 largest position rows that were purchased
largest_positions = longs_purchased.nlargest(20, ["PositionSize"])
print("\nTop 10 Largest Purchased Positions:")
largest_positions[columns_of_interest]

In [None]:
smallest_positions = longs.nsmallest(10, ["PositionSize"])
print("\nTop 10 Smallest Positions:")
smallest_positions[columns_of_interest]

In [None]:
smallest_positions = longs_purchased.nsmallest(10, ["PositionSize"])
print("\nTop 10 Smallest Purchased Positions:")
smallest_positions[columns_of_interest]

In [None]:
avg_duration_purchased = longs_purchased['Duration'].mean()
avg_duration_not_purchased = longs[longs['Purchased'] == 0]['Duration'].mean()
max_duration_purchased = longs_purchased['Duration'].max()

print(f"Average duration for purchased positions: {avg_duration_purchased:.2f} days")
print(f"Average duration for not purchased positions: {avg_duration_not_purchased:.2f} days")
print(f"Max duration for purchased positions: {max_duration_purchased:.2f} days")

longest_duration = longs_purchased.nlargest(20, ["Duration"])
print("\nTop 20 Longest to Open Positions:")
longest_duration[columns_of_interest]

In [None]:
avg_stop_loss_all = longs['StopPricePct'].mean()
avg_stop_loss_purchased = longs_purchased['StopPricePct'].mean()
avg_stop_loss_not_purchased = longs[longs['Purchased'] == 0]['StopPricePct'].mean()

print(f"Average stop loss for all positions: {avg_stop_loss_all:.2f}")
print(f"Average stop loss for purchased positions: {avg_stop_loss_purchased:.2f}")
print(f"Average stop loss for not purchased positions: {avg_stop_loss_not_purchased:.2f}")

In [None]:
print("Analyzing stop loss amount, this looks very inconsistent to me")

print(f'Average size of stop loss amount for longs: {longs["StopLossAmount"].mean():.2f}')
print(f'Max stop loss amount: {longs["StopLossAmount"].max():.2f}')
print(f'Min stop loss amount: {longs["StopLossAmount"].min():.2f}')

longs['rolling_average_size'] = longs["StopLossAmount"].rolling(window=30).mean()

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.lineplot(data=longs, x='Created', y='rolling_average_size')
plt.title('Rolling Average Size of Stop Loss Amount Over Time')
plt.xlabel('Date')
plt.ylabel('Average Stop Loss Amount')
plt.show()

# Plot the distribution of position sizes
plt.figure(figsize=(12, 6))
sns.histplot(longs["StopLossAmount"], bins=30, kde=True)
plt.title('Distribution of Stop Loss Amounts')
plt.xlabel('Stop Loss Amount')
plt.ylabel('Frequency')
plt.show()

# plot the distribution of purchased position sizes
plt.figure(figsize=(12, 6))
sns.histplot(longs_purchased["StopLossAmount"], bins=30, kde=True)
plt.title('Distribution of Stop Loss Amount of Purchased Positions')
plt.xlabel('Stop Loss Amount')
plt.ylabel('Frequency')
plt.show()

In [None]:
largest_by_stop_loss_amount = longs_purchased.nsmallest(20, ["StopLossAmount"])

print("Positions sorted by stop loss amount")
largest_by_stop_loss_amount[columns_of_interest]

In [None]:
smallest_by_stop_loss_amount = longs_purchased.nlargest(20, ["StopLossAmount"])

print("Purchased positions sorted by stop loss amount ascending")
smallest_by_stop_loss_amount[columns_of_interest]