In [None]:
# Basics of Python
import numpy as np
import matplotlib.pyplot as plt

# Set seed of random number generator
CWID = -1 # Place here your Campus wide ID number, this will personalize
#your results, but still maintain the reproducibl nature of using seeds.
personal = CWID % 10000
np.random.seed(personal)

In [None]:
# Generate uniform random numbers between -1 and 1
x = np.random.uniform(low=-1,high=1,size=1000)
plt.hist(x)
plt.show()

# Generate standard normal random numbers
x = np.random.normal(loc=0,scale=1,size=1000)
# Draw a histogram with 50 bins
plt.hist(x,bins=50)
plt.show()

# Change the mean/variance
x = np.random.normal(loc=20,scale=5,size=10000) # loc = mean; scale = standard deviations
plt.hist(x,bins=50)
plt.show()

# Compute the sample mean and standard deviations
print(np.mean(x))
print(np.std(x))
print(np.var(x))
# Do these values make sense?

In [None]:
# Vectors + Loops + sampling
# Take out 25 random samples of 500 observations each (with replacement)
mu = np.zeros(25)
for i in range(0,25): #Note that Python likes to index starting at 0
    mu[i] = np.mean(np.random.choice(x,size=500,replace=True)) #Sample 500 observations from x (with replacement)
#Calculate the mean of the sample means and the standard deviation of the sample means
print(np.mean(mu))
print(np.std(mu))

# Compare with central limit theorem
# Mean = 20
# Standard deviation = sigma/sqrt(n) = 5/sqrt(500) = 0.2236

In [None]:
# Create a vector of values from 1 to 50
# Directly:
x = range(1,51)
print(x)

x = np.arange(1,51)
print(x)

# Loops:
# For loops:
x = np.zeros(50)
for i in range(0,50):
    x[i] = i+1 #Remember Python indexes starting at 0
print(x)

# While loops:
x = np.zeros(50)
i = 0
while i < 50:
    x[i] = i+1
    i = i+1
print(x)


# If statements:
if len(x) > 100:
    print('Length > 100')
elif len(x) > 50:
    print('Length > 50')
else:
    print('Length <= 50')

In [None]:
# Download data sets: "fakedataeasy.csv"
import pandas as pd

df = pd.read_csv("fakedataeasy.csv")
print(df)
# Find the mean and standard deviation of the variables
print(df.mean())
print(df.std())

In [None]:
# Download financial data:
from pandas_datareader.data import DataReader
import yfinance
from datetime import datetime

myData = yfinance.download(["IBM","MSFT"],datetime(2018,1,1),datetime(2021,12,31)) #IBM and Microsoft chosen at random
#myData = DataReader(["IBM","MSFT"],"yahoo",datetime(2018,1,1),datetime(2021,12,31)) 
print(myData)

IBM = myData["Adj Close"]["IBM"]
MSFT = myData["Adj Close"]["MSFT"]
rIBM = np.log(IBM) - np.log(IBM.shift(1)) # Daily log return
rMSFT = np.log(MSFT) - np.log(MSFT.shift(1))

# Create a data frame
df = pd.DataFrame({'IBM': rIBM,'MSFT': rMSFT})
print(df)

# Find the mean and standard deviation of daily returns
print(df.mean())
print(df.std())