In [None]:
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math as m

print('Pandas version', pd.__version__)
print('Numpy version', np.__version__)

In [None]:
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.precision', 2)
pd.set_option('max_rows', 250)
pd.set_option('max_columns', 250)

plt.style.use('seaborn')

In [None]:
#import numba
#@numba.jit
def Auto_Loan(Loans, Rates, Periods):
    Rates = Rates / 100 / 12
    return (Rates * Loans / (1-m.pow(1 + Rates, -Periods)))

In [None]:
%time
Auto_Loan(25515, 3.19, 72)

In [None]:
%time
np.pmt(rate=(3.19/100/12), nper=72, pv=-25515)

In [None]:
print(Auto_Loan(25515, 3.19, 72))

print(np.pmt(rate=(3.19/100/12), nper=72, pv=-25515))

In [None]:
states = np.array(['NJ','NY','PA'])
states

In [None]:
ratios = np.array([.1, .85, .05])
ratios

In [None]:
Cars = np.r_[22000:30001,41000,45000,51000:60001,105000]
type(Cars)

In [None]:
Cars.size

In [None]:
30000 in Cars

In [None]:
30001 in Cars

In [None]:
Cars2 = np.random.randint(22000, 40000, 15)
Cars2

In [None]:
Cars[np.isin(Cars, Cars2)]

In [None]:
sns.lineplot(x=np.arange(0,Cars.size), y=Cars);

In [None]:
np.median(Cars)

In [None]:
np.std(Cars)

In [None]:
np.busday_count('2019-01-01', '2020-01-01')

In [None]:
365 - (52*2)

In [None]:
nyd = [str(y) + '-01-01' for y in range(1970, 2031)]
july4 = [str(y) + '-07-04' for y in range(1970, 2031)]
xmas = [str(y) + '-12-25' for y in range(1970, 2031)]

print(nyd[-7:])
print(july4[-7:])
print(xmas[-7:])

In [None]:
np.busday_count('2019-01-01', '2020-01-01', holidays=nyd + july4 + xmas)

In [None]:
N = 3000

Start_Dates = np.random.choice(np.arange('2019-11-26', '2020-07-01', dtype='datetime64[D]'), replace=True, size=N)
States = np.random.choice(states, size=(N), p=ratios)
Loans = np.random.normal(loc=np.median(Cars), scale=1000, size=N).astype(int)
Rates = np.random.choice(np.linspace(start=4.6, stop=5.2, num=6, dtype=np.float32), replace=True, size=N)
Periods = np.random.choice(np.arange(12, 84, 12), replace=True, size=N)
End_Dates = Start_Dates + (Periods * 30)
Bus_Days = np.busday_count(Start_Dates, End_Dates)
Luxury = np.where(Loans >= 50000, True, False)

In [None]:
#start = time.perf_counter_ns()

Payments = np.pmt(rate=Rates/100/12, nper=Periods, pv=-Loans)

#print("Processed in {:,} nanseconds".format(time.perf_counter_ns()-start))

In [None]:
auto_np = np.empty(N, dtype={'names':('states','loans','rates','periods','payments','start_dates','end_dates', 'bus_days', 'luxury'),
                             'formats':('U2','f8','f8','i8','f8','datetime64[D]','datetime64[D]', 'i8', 'bool')})

In [None]:
auto_np['states'] = States
auto_np['loans'] = Loans
auto_np['rates'] = Rates
auto_np['periods'] = Periods
auto_np['payments'] = Payments
auto_np['start_dates'] = Start_Dates
auto_np['end_dates'] = End_Dates
auto_np['bus_days'] = Bus_Days
auto_np['luxury'] = Luxury

auto_np.size

In [None]:
print('${:,.0f}'.format(auto_np['payments'].sum()))

In [None]:
print('${:,.2f}'.format(auto_np['payments'].mean()))

In [None]:
print('${:,.2f}'.format(auto_np['payments'].max()))

In [None]:
auto_np[0]

In [None]:
np.set_printoptions(precision=2)

In [None]:
print(auto_np[0:5])

In [None]:
print(auto_np[-5:])

In [None]:
print(auto_np[auto_np['periods']==12][0:5])

In [None]:
auto_np[auto_np['payments'] > 600]['loans']

In [None]:
auto_np[auto_np['payments'] == auto_np['payments'].min()]['loans']

In [None]:
auto_np[auto_np['loans'] < 0]['loans']

In [None]:
np.unique(auto_np['states'])

In [None]:
sns.distplot(auto_np['loans']);

In [None]:
sns.distplot(auto_np['payments']);

In [None]:
sns.boxplot(x=auto_np['periods'], y=auto_np['payments']);

In [None]:
sns.countplot(x=auto_np['luxury'], alpha=.8);

In [None]:
sns.barplot(x=auto_np['states'], y=auto_np['payments']);

In [None]:
auto_np['payments'][0:10]

In [None]:
#auto_np['payments'].tofile(file='auto_payments.csv', format='%.0f', sep=';')

In [None]:
#auto_payments = np.fromfile(file='auto_payments.csv', sep=';')
#auto_payments[0:10]

In [None]:
price_test = auto_np['loans'] > 50000
payment_test = auto_np['payments'] < 1000

auto_bool = np.logical_and(price_test, payment_test)

auto_np[auto_bool].size

In [None]:
df = pd.DataFrame.from_records(auto_np[auto_bool])
df.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.nsmallest(n=20, columns='payments')

In [None]:
cars = sns.load_dataset('mpg')
cars.head()

In [None]:
cars.isna().sum()

In [None]:
cars.corr()

In [None]:
cars.dropna(subset=['horsepower'], inplace=True)

In [None]:
X = cars.horsepower.values
Y = cars.mpg.values

sns.regplot(X, Y );

In [None]:
# numpy polyfit

# Creates a linear regression from the data points
m, b = np.polyfit(x=X, y=Y, deg=1)

print("m={:.02f},  b={:.02f}".format(m, b))

In [None]:
# This is a simple y = mx + b line function
def f(x):
    return m*x + b

# This generates the same scatter plot as before, but adds a line plot using the function above
sns.scatterplot(x=X, y=Y, alpha=.4)
sns.lineplot(X, f(X), color='grey');

In [None]:
fit = np.polyfit(x=X, y=Y, deg=1)

fit

In [None]:
np.polyval(fit,75)

In [None]:
xval = 100

sns.scatterplot(x=X, y=Y, alpha=.4)
sns.scatterplot(x=[xval], y=[np.polyval(fit,xval)], color='r')
sns.lineplot(X, f(X), color='grey');

In [None]:
from ipywidgets import interact

@interact(xval=150)
def plot(xval):
    sns.scatterplot(x=X, y=Y, alpha=.4)
    sns.scatterplot(x=[xval], y=[np.polyval(fit,xval)], color='r')
    sns.lineplot(X, f(X), color='grey');
