# Investments

This will be where we get all the data for our investments.

We will then run that data through our ETL pipeline until it is ready to be loaded into SQL Server for analysis

Brief explanation of pipeline:

1. Start by picking our stocks, etf, and forex, as well as an index that can look at the overall market

2. Extract the data by requesting it from polygon.io

3. Clean data (removing nulls etc)

4. Aggregate and add metrics that will be important to track

5. Format data into star schema

6. Load into SQL Server for analysis


In [88]:
# Picking our stocks, ETF, and forex
stock1 = {"ticker": "NVDA", "name": "Nvidia"}
stock2 = {"ticker": "AMZN", "name": "Amazon"}
stock3 = {"ticker": "JPM", "name": "JPMorgan"}
etf = {"ticker": "SPY", "name": "SPY"}
forex = {"ticker": "C:USDCAD", "name": "USDCAD"}

# Our way to look at the overall market
market = {"ticker": "I:NDX", "name": "NDX"}

# List for easy access
investments = [stock1, stock2, stock3, etf, forex, market]

In [89]:
import requests
import os
import csv
import pandas as pd
import numpy as np
from dotenv import load_dotenv

In [90]:
load_dotenv()
api_key = os.environ["api_key"]
params = {"apiKey": api_key}
base_url = "https://api.polygon.io"

In [91]:
# # Creating a request to get information on each investment every day from 1/1/23 to 7/1/24

# for investment in investments:
#     response = requests.get(
#         f"{base_url}/v2/aggs/ticker/{investment["ticker"]}/range/1/day/2023-01-01/2024-07-01?adjusted=true&sort=asc",
#         params,
#     )
#     investment["data"] = response.json()["results"]
#     print(investment)

# # # Saving our data to a csv file so we don't have to keep requesting it
# for investment in investments:
#     keys = investment["data"][0].keys()
#     with open(f"./Data/Uncleaned/{investment["name"]}.csv", "w", newline="") as output_file:
#         dict_writer = csv.DictWriter(output_file, keys)
#         dict_writer.writeheader()
#         dict_writer.writerows(investment["data"])

In [92]:
# Creating our Pandas dataframes from our saved csv file
for investment in investments:
    investment["df"] = pd.read_csv(f"./Data/Uncleaned/{investment["name"]}.csv")
    print(f"name: {investment["name"]} length: {len(investment["df"])}")

name: Nvidia length: 375
name: Amazon length: 375
name: JPMorgan length: 375
name: SPY length: 375
name: USDCAD length: 491
name: NDX length: 342


## Now that we have all the data in a pandas dataframe we can start with data cleaning

In [93]:
df_list = [stock1["df"], stock2["df"], stock3["df"], etf["df"], forex["df"], market["df"]]

# Check for null values
for df in df_list:
    print(f"Number of missing values: {df.isnull().values.sum()}")

Number of missing values: 0
Number of missing values: 0
Number of missing values: 0
Number of missing values: 0
Number of missing values: 0
Number of missing values: 0


In [94]:
# Remove duplicate rows if any
for df in df_list:
    df.drop_duplicates(inplace=True)

In [95]:
# Renaming columns to something we can understand

# Dictionary to understand what the columns will mean
keys = {
    "v": "volume",
    "vw": "volume_weighted_average",
    "o": "open_price",
    "c": "close_price",
    "h": "highest_price",
    "l": "lowest_price",
    "t": "timestamp",
    "n": "num_transactions",
}

for df in df_list:
    df.rename(columns=keys, inplace=True)

# Drop columns that we probably won't use and change volume dtype to int
for df in df_list:
    if "volume" in df:
        df["volume"] = df["volume"].astype(int)
    if "num_transactions" in df:
        df.drop(columns=["num_transactions"], inplace=True)


stock1["df"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 375 entries, 0 to 374
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   volume                   375 non-null    int64  
 1   volume_weighted_average  375 non-null    float64
 2   open_price               375 non-null    float64
 3   close_price              375 non-null    float64
 4   highest_price            375 non-null    float64
 5   lowest_price             375 non-null    float64
 6   timestamp                375 non-null    int64  
dtypes: float64(5), int64(2)
memory usage: 20.6 KB


In [96]:
# Converting the timestamp into a readable date
for df in df_list:
    df["date"] = pd.to_datetime(df["timestamp"], unit="ms").apply(
        lambda dt: dt.replace(hour=0, minute=0, second=0)
    )
    df.drop(columns=["timestamp"], inplace=True)
stock1["df"]

Unnamed: 0,volume,volume_weighted_average,open_price,close_price,highest_price,lowest_price,date
0,401276580,14.3681,14.851,14.315,14.996,14.096,2023-01-03
1,431323600,14.6152,14.567,14.749,14.853,14.241,2023-01-04
2,389168110,14.3140,14.491,14.265,14.564,14.148,2023-01-05
3,405043620,14.5982,14.474,14.859,15.010,14.034,2023-01-06
4,504180560,15.7877,15.284,15.628,16.056,15.141,2023-01-09
...,...,...,...,...,...,...,...
370,411392492,123.5418,121.200,126.090,126.500,119.320,2024-06-25
371,355252504,125.0539,126.130,126.400,128.120,122.600,2024-06-26
372,247638444,124.3026,124.100,123.990,126.410,122.920,2024-06-27
373,309719295,124.8037,124.580,123.540,127.710,122.750,2024-06-28


### There are missing days in the data

In [97]:
# Filling in the missing data.

# Creating a date range for the year of 2023
# date_range = pd.date_range(
#     start="2023-01-01 05:00:00", end="2023-12-31 05:00:00", freq="D"
# )

# Temporary set the date as the index
for df in df_list:
    df.set_index("date", inplace=True)

# Reindex our df based on the date_range we set up and fill in the missing data using the "nearest" method
# stock1["df"] = stock1["df"].reindex(date_range, method="nearest")
# stock2["df"] = stock2["df"].reindex(date_range, method="nearest")
# stock3["df"] = stock3["df"].reindex(date_range, method="nearest")
# etf["df"] = etf["df"].reindex(date_range, method="nearest")
# forex["df"] = forex["df"].reindex(date_range, method="nearest")

#  Reset our index back to normal
# stock1["df"].reset_index(names="date", inplace=True)
# stock2["df"].reset_index(names="date", inplace=True)
# stock3["df"].reset_index(names="date", inplace=True)
# etf["df"].reset_index(names="date", inplace=True)
# forex["df"].reset_index(names="date", inplace=True)

# df_list = [stock1["df"], stock2["df"], stock3["df"], etf["df"], forex["df"], market["df"]]
# stock1["df"]

## Now that our data is clean and complete we can start adding in other features we want to track

In [98]:
# Create a column for the 10 and 100 day moving averages
for df in df_list:
    df["10_day_ma"] = df["close_price"].rolling(window="10D",).mean()
    df["100_day_ma"] = df["close_price"].rolling(window="100D").mean()

In [99]:
# Create a column that will track our daily return by finding the difference between the previous day's closing price
for df in df_list:
    df["daily_return"] = df["close_price"].pct_change()
    df.fillna(0, inplace=True)
    df["cumulative_return"] = (1 + df["daily_return"]).cumprod() - 1

In [100]:
# Calculating Volatility over a 30 days window
for df in df_list:
    df["volatility"] = df["daily_return"].rolling(window="30D").std() * (len(df)**0.5)

In [101]:
# Calculating Sharpe Ratio
for df in df_list:
    avg_return = df["daily_return"].mean()
    excess_return = avg_return - ((1 + 0.03) ** (1 / len(df)) - 1)
    sharpe_ratio = excess_return / df["volatility"]
    df["sharpe_ratio"] = sharpe_ratio
    df.fillna(0, inplace=True)

In [102]:
# Reset the index so it doesn't use the date anymore
for df in df_list:
    df.reset_index(names="date", inplace=True)

In [103]:
# Calculating Beta
for df in df_list:
    merge_df = pd.merge(
        df[["date", "daily_return"]],
        market["df"][["date", "daily_return"]],
        on="date",
        suffixes=("_stock", "_market"),
    )
    # Calculate covariance matrix
    covariance_matrix = np.cov(
        merge_df["daily_return_stock"], merge_df["daily_return_market"]
    )

    # Extract covariance of the asset with the market
    covariance_stock_market = covariance_matrix[0, 1]

    # Calculate variance of the market
    variance_market = np.var(merge_df["daily_return_market"])

    df["beta"] = covariance_stock_market / variance_market

## Checking our data

In [104]:
print(stock1["name"])
stock1["df"]

Nvidia


Unnamed: 0,date,volume,volume_weighted_average,open_price,close_price,highest_price,lowest_price,10_day_ma,100_day_ma,daily_return,cumulative_return,volatility,sharpe_ratio,beta
0,2023-01-03,401276580,14.3681,14.851,14.315,14.996,14.096,14.315000,14.315000,0.000000,0.000000,0.000000,0.000000,1.968429
1,2023-01-04,431323600,14.6152,14.567,14.749,14.853,14.241,14.532000,14.532000,0.030318,0.030318,0.415144,0.014866,1.968429
2,2023-01-05,389168110,14.3140,14.491,14.265,14.564,14.148,14.443000,14.443000,-0.032816,-0.003493,0.611448,0.010093,1.968429
3,2023-01-06,405043620,14.5982,14.474,14.859,15.010,14.034,14.547000,14.547000,0.041640,0.038002,0.646813,0.009541,1.968429
4,2023-01-09,504180560,15.7877,15.284,15.628,16.056,15.141,14.763200,14.763200,0.051753,0.091722,0.667736,0.009242,1.968429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,2024-06-25,411392492,123.5418,121.200,126.090,126.500,119.320,128.018333,98.807406,0.067564,7.808243,0.711071,0.008679,1.968429
371,2024-06-26,355252504,125.0539,126.130,126.400,128.120,122.600,127.787143,99.357333,0.002459,7.829899,0.693640,0.008897,1.968429
372,2024-06-27,247638444,124.3026,124.100,123.990,126.410,122.920,126.788571,99.858667,-0.019066,7.661544,0.647230,0.009535,1.968429
373,2024-06-28,309719295,124.8037,124.580,123.540,127.710,122.750,125.068571,100.339362,-0.003629,7.630108,0.647931,0.009525,1.968429


In [105]:
print(stock2["name"])
stock2["df"]

Amazon


Unnamed: 0,date,volume,volume_weighted_average,open_price,close_price,highest_price,lowest_price,10_day_ma,100_day_ma,daily_return,cumulative_return,volatility,sharpe_ratio,beta
0,2023-01-03,76706040,85.5452,85.460,85.82,86.9600,84.205,85.820000,85.820000,0.000000,0.000000,0.000000,0.000000,1.195712
1,2023-01-04,68885123,85.1301,86.550,85.14,86.9800,83.360,85.480000,85.480000,-0.007924,-0.007924,0.108498,0.021434,1.195712
2,2023-01-05,67930825,83.7788,85.330,83.12,85.4200,83.070,84.693333,84.693333,-0.023726,-0.031461,0.233906,0.009942,1.195712
3,2023-01-06,83303361,84.4954,83.030,86.08,86.4000,81.430,85.040000,85.040000,0.035611,0.003030,0.486045,0.004785,1.195712
4,2023-01-09,65266056,88.3985,87.460,87.36,89.4800,87.080,85.504000,85.504000,0.014870,0.017945,0.437753,0.005312,1.195712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,2024-06-25,44586537,186.9700,186.810,186.34,188.8400,185.420,185.660000,182.497826,0.004149,1.171289,0.229903,0.010115,1.195712
371,2024-06-26,61298487,192.1127,186.920,193.61,194.8000,186.260,186.795714,182.775072,0.039015,1.256001,0.274267,0.008479,1.195712
372,2024-06-27,73974152,197.3166,195.005,197.85,199.8400,194.200,188.765714,183.093188,0.021900,1.305407,0.284817,0.008165,1.195712
373,2024-06-28,71149090,194.7864,197.730,193.25,198.8500,192.500,190.257143,183.312029,-0.023250,1.251806,0.306942,0.007577,1.195712


In [106]:
print(stock3["name"])
stock3["df"]

JPMorgan


Unnamed: 0,date,volume,volume_weighted_average,open_price,close_price,highest_price,lowest_price,10_day_ma,100_day_ma,daily_return,cumulative_return,volatility,sharpe_ratio,beta
0,2023-01-03,11054778,135.1124,135.240,135.12,136.74,133.8900,135.120000,135.120000,0.000000,0.000000,0.000000,0.000000,0.342213
1,2023-01-04,11687643,136.4908,135.990,136.38,137.68,135.5700,135.750000,135.750000,0.009325,0.009325,0.127688,0.008771,0.342213
2,2023-01-05,8381265,134.9880,135.660,135.35,135.71,133.7004,135.616667,135.616667,-0.007552,0.001702,0.163716,0.006841,0.342213
3,2023-01-06,10029076,137.3405,136.125,137.94,138.38,134.4900,136.197500,136.197500,0.019136,0.020870,0.223852,0.005003,0.342213
4,2023-01-09,8482297,137.7288,138.600,137.37,138.88,136.8800,136.432000,136.432000,-0.004132,0.016652,0.210124,0.005330,0.342213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,2024-06-25,6529272,198.5962,198.090,198.07,200.07,197.7400,197.316667,195.457101,-0.004073,0.465882,0.221767,0.005050,0.342213
371,2024-06-26,7351224,197.2270,197.450,197.43,197.94,196.2750,197.332857,195.526232,-0.003231,0.461146,0.216438,0.005174,0.342213
372,2024-06-27,7448593,198.9616,197.440,199.17,199.86,196.9000,197.931429,195.604203,0.008813,0.474023,0.218693,0.005121,0.342213
373,2024-06-28,14155246,201.8693,200.010,202.26,202.60,199.3018,198.682857,195.690145,0.015514,0.496892,0.225817,0.004959,0.342213


In [107]:
print(etf["name"])
etf["df"]

SPY


Unnamed: 0,date,volume,volume_weighted_average,open_price,close_price,highest_price,lowest_price,10_day_ma,100_day_ma,daily_return,cumulative_return,volatility,sharpe_ratio,beta
0,2023-01-03,74850731,380.9576,384.37,380.82,386.43,377.8310,380.820000,380.820000,0.000000,0.000000,0.000000,0.000000,0.656775
1,2023-01-04,85934098,383.1494,383.18,383.76,385.88,380.0000,382.290000,382.290000,0.007720,0.007720,0.105713,0.008605,0.656775
2,2023-01-05,76275354,380.2625,381.72,379.38,381.84,378.7600,381.320000,381.320000,-0.011413,-0.003781,0.186407,0.004880,0.656775
3,2023-01-06,104052662,385.2463,382.61,388.08,389.25,379.4127,383.010000,383.010000,0.022932,0.019064,0.279109,0.003259,0.656775
4,2023-01-09,73978071,390.3628,390.37,387.86,393.70,387.6700,383.980000,383.980000,-0.000567,0.018486,0.246160,0.003695,0.656775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,2024-06-25,38253579,543.9439,543.99,544.83,545.20,542.4400,545.778333,521.951159,0.003851,0.430676,0.098326,0.009251,0.656775
371,2024-06-26,38408871,544.7822,543.69,545.51,546.24,543.0300,545.740000,522.424348,0.001248,0.432462,0.095840,0.009491,0.656775
372,2024-06-27,34948595,545.7994,545.37,546.37,546.96,544.6100,545.635714,522.868696,0.001577,0.434720,0.095785,0.009496,0.656775
373,2024-06-28,74580583,545.9113,547.16,544.22,550.28,542.9500,545.025714,523.212754,-0.003935,0.429074,0.091473,0.009944,0.656775


In [108]:
print(forex["name"])
forex["df"]

USDCAD


Unnamed: 0,date,volume,volume_weighted_average,open_price,close_price,highest_price,lowest_price,10_day_ma,100_day_ma,daily_return,cumulative_return,volatility,sharpe_ratio,beta
0,2023-01-01,102,1.3530,1.35247,1.35281,1.35476,1.35167,1.352810,1.352810,0.000000,0.000000,0.000000,0.000000,-0.109905
1,2023-01-02,71904,1.3560,1.35262,1.35622,1.35900,1.33160,1.354515,1.354515,0.002521,0.002521,0.039495,-0.000542,-0.109905
2,2023-01-03,199896,1.3621,1.35631,1.36718,1.36849,1.35210,1.358737,1.358737,0.008081,0.010622,0.091622,-0.000234,-0.109905
3,2023-01-04,220723,1.3554,1.36719,1.34851,1.36741,1.34365,1.356180,1.356180,-0.013656,-0.003179,0.204616,-0.000105,-0.109905
4,2023-01-05,205787,1.3535,1.34852,1.35625,1.35954,1.34571,1.356194,1.356194,0.005740,0.002543,0.188557,-0.000113,-0.109905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486,2024-06-26,96927,1.3682,1.36590,1.37047,1.37076,1.36480,1.369279,1.366511,0.003192,0.013054,0.053232,-0.000402,-0.109905
487,2024-06-27,96736,1.3691,1.37046,1.36980,1.37123,1.36740,1.369102,1.366656,-0.000489,0.012559,0.053053,-0.000403,-0.109905
488,2024-06-28,111975,1.3699,1.36995,1.36507,1.37340,1.36482,1.368368,1.366862,-0.003453,0.009063,0.050258,-0.000426,-0.109905
489,2024-06-30,2451,1.3671,1.36705,1.36730,1.36745,1.36598,1.367955,1.367110,0.001634,0.010711,0.047302,-0.000452,-0.109905


In [109]:
print(market["name"])
market["df"]

NDX


Unnamed: 0,date,open_price,close_price,highest_price,lowest_price,10_day_ma,100_day_ma,daily_return,cumulative_return,volatility,sharpe_ratio,beta
0,2023-02-22,12085.674327,12066.271823,12156.213862,12006.037547,12066.271823,12066.271823,0.000000,0.000000,0.000000,0.000000,1.002933
1,2023-02-24,11979.799425,11969.651998,12018.320636,11900.838503,12017.961911,12017.961911,-0.008007,-0.008007,0.104711,0.013529,1.002933
2,2023-02-27,12106.791649,12057.788354,12159.638149,12034.612553,12031.237392,12031.237392,0.007363,-0.000703,0.142169,0.009965,1.002933
3,2023-02-28,12041.746389,12042.116479,12146.523163,12021.320963,12033.957164,12033.957164,-0.001300,-0.002002,0.116513,0.012159,1.002933
4,2023-03-01,12026.719702,11937.475278,12054.476681,11906.578816,12014.660787,12014.660787,-0.008690,-0.010674,0.121593,0.011651,1.002933
...,...,...,...,...,...,...,...,...,...,...,...,...
337,2024-06-25,19555.839136,19701.128649,19712.415452,19523.506722,19740.013992,18383.875421,0.011631,0.632744,0.151749,0.009336,1.002933
338,2024-06-26,19671.068079,19751.047838,19763.545747,19654.183104,19741.590256,18409.470217,0.002534,0.636881,0.147908,0.009578,1.002933
339,2024-06-27,19740.515013,19789.027010,19850.810183,19701.336249,19725.344279,18434.931407,0.001923,0.640028,0.147869,0.009581,1.002933
340,2024-06-28,19817.004516,19682.870924,20017.714607,19665.848633,19693.060469,18455.841031,-0.005364,0.631231,0.146214,0.009689,1.002933


In [110]:
# # Saving our cleaned data to a csv file
for investment in investments:
    investment["df"].to_csv(f"./Data/Cleaned/{investment["name"]}.csv")

## Now that we have all of our stock, etf, forex, and market data ready we can put it into a star format and export to SQL Server

In [111]:
# Creating our dimension tables
investment_type = {
    "investment_type_id": [1, 2, 3, 4],
    "investment_type": ["Stock", "ETF", "Forex", "Market"],
}

investment_name = {
    "investment_name_id": [1, 2, 3, 4, 5, 6],
    "investment_name": ["Nvidia", "Amazon", "JPMorgan", "SPY", "USDCAD", "NDX"],
}

investment_sector = {
    "investment_sector_id": [1, 2, 3, 4, 5],
    "investment_sector": ["Tech", "Consumer", "Finance", "Broad Market", "Currency"],
}

investment_type_df = pd.DataFrame(investment_type)
investment_name_df = pd.DataFrame(investment_name)
investment_sector_df = pd.DataFrame(investment_sector)

In [112]:
# Creating the time dimension table

investment_time_df = pd.DataFrame(
    {"investment_date": pd.date_range(start="2023-01-01", end="2024-07-01", freq="D")}
)
investment_time_df["investment_month"] = investment_time_df["investment_date"].dt.month
investment_time_df["investment_quarter"] = investment_time_df["investment_date"].dt.quarter
investment_time_df["investment_year"] = investment_time_df["investment_date"].dt.year

investment_time_df["time_id"] = investment_time_df.index + 1
investment_time_df.set_index("time_id", inplace=True)

investment_time_df

Unnamed: 0_level_0,investment_date,investment_month,investment_quarter,investment_year
time_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2023-01-01,1,1,2023
2,2023-01-02,1,1,2023
3,2023-01-03,1,1,2023
4,2023-01-04,1,1,2023
5,2023-01-05,1,1,2023
...,...,...,...,...
544,2024-06-27,6,2,2024
545,2024-06-28,6,2,2024
546,2024-06-29,6,2,2024
547,2024-06-30,6,2,2024


In [113]:
# Adding in the relationships to our dimensions
for i in investments:
    if i["ticker"] == "NVDA":
        i["df"]["investment_type"] = 1
        i["df"]["investment_name"] = 1
        i["df"]["investment_sector"] = 1
    elif i["ticker"] == "AMZN":
        i["df"]["investment_type"] = 1
        i["df"]["investment_name"] = 2
        i["df"]["investment_sector"] = 2
    elif i["ticker"] == "JPM":
        i["df"]["investment_type"] = 1
        i["df"]["investment_name"] = 3
        i["df"]["investment_sector"] = 3
    elif i["ticker"] == "SPY":
        i["df"]["investment_type"] = 2
        i["df"]["investment_name"] = 4
        i["df"]["investment_sector"] = 4
    elif i["ticker"] == "C:USDCAD":
        i["df"]["investment_type"] = 3
        i["df"]["investment_name"] = 5
        i["df"]["investment_sector"] = 5
    elif i["ticker"] == "I:NDX":
        i["df"]["investment_type"] = 4
        i["df"]["investment_name"] = 6
        i["df"]["investment_sector"] = 4

In [123]:
# Combine all of our dataframes into 1 fact table
fact_df = pd.concat([df for df in df_list])
fact_df.reset_index(drop=True, inplace=True)
fact_df.insert(0, "investment_id", range(1, 1 + len(fact_df)))

fact_df.rename(columns={"date": "investment_date"}, inplace=True)

fact_df = fact_df.merge(
    investment_time_df.reset_index()[["time_id", "investment_date"]],
    on="investment_date",
    how="left",
)

fact_df.drop(
    columns=[
        "investment_date",
        "volume",
        "volume_weighted_average",
        "open_price",
        "highest_price",
        "lowest_price",
    ],
    inplace=True,
)

# Reorering columns for neatness
fact_df = fact_df[
    [
        "investment_id",
        "investment_name",
        "investment_type",
        "investment_sector",
        "time_id",
        "close_price",
        "daily_return",
        "cumulative_return",
        "10_day_ma",
        "100_day_ma",
        "volatility",
        "sharpe_ratio",
        "beta",
    ]
]
fact_df

Unnamed: 0,investment_id,investment_name,investment_type,investment_sector,time_id,close_price,daily_return,cumulative_return,10_day_ma,100_day_ma,volatility,sharpe_ratio,beta
0,1,1,1,1,3,14.315000,0.000000,0.000000,14.315000,14.315000,0.000000,0.000000,1.968429
1,2,1,1,1,4,14.749000,0.030318,0.030318,14.532000,14.532000,0.415144,0.014866,1.968429
2,3,1,1,1,5,14.265000,-0.032816,-0.003493,14.443000,14.443000,0.611448,0.010093,1.968429
3,4,1,1,1,6,14.859000,0.041640,0.038002,14.547000,14.547000,0.646813,0.009541,1.968429
4,5,1,1,1,9,15.628000,0.051753,0.091722,14.763200,14.763200,0.667736,0.009242,1.968429
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2328,2329,6,4,4,542,19701.128649,0.011631,0.632744,19740.013992,18383.875421,0.151749,0.009336,1.002933
2329,2330,6,4,4,543,19751.047838,0.002534,0.636881,19741.590256,18409.470217,0.147908,0.009578,1.002933
2330,2331,6,4,4,544,19789.027010,0.001923,0.640028,19725.344279,18434.931407,0.147869,0.009581,1.002933
2331,2332,6,4,4,545,19682.870924,-0.005364,0.631231,19693.060469,18455.841031,0.146214,0.009689,1.002933


In [124]:
# Save our star schema files as csv
fact_df.to_csv("./Data/Star/investments.csv", index=False)
investment_type_df.to_csv("./Data/Star/investment_type.csv", index=False)
investment_name_df.to_csv("./Data/Star/investment_name.csv", index=False)
investment_sector_df.to_csv("./Data/Star/investment_sector.csv", index=False)
investment_time_df.to_csv("./Data/Star/investment_time.csv")