In [None]:
import pandas as pd
from os import walk
import re

data_dir = 'data'
quarterly_valuation_measures = 'quarterly_valuation_measures'
quarterly_income_statement = 'quarterly_financials'
quarterly_cash_flow = 'quarterly_cash-flow'
quarterly_balance_sheet = 'quarterly_balance-sheet'

def get_stock_tickers(dir):
	filenames = next(walk(dir), (None, None, []))[2]  # [] if no file
	filenames = [re.sub('[_.].+$', '', f) for f in filenames if re.match('[A-Z]+_.+\.csv', f)]
	return list(set(filenames))

def combine_financials(data_dir, stock, financials):
	df = pd.concat([pd.read_csv('{}/{}_{}.csv'.format(data_dir, stock, financial)) for financial in financials])
	df = df[df['name'].str.match('^\t.*') == False]
	df = df.replace({',': ''}, regex=True)
	df = df.set_index('name').T
	for col in df.columns:
		df[col] = pd.to_numeric(df[col], errors='ignore')
	df.reset_index(inplace=True)
	df = df.rename(columns = {'index':'Quarter'})
	return df

def calculate_YoY(df):
	measures = [
		'InterestIncome',
		'NormalizedIncome',
		'DilutedNIAvailtoComStockholders',
		'NetIncomeCommonStockholders',
		'PretaxIncome',
		'TotalOperatingIncomeAsReported',
		'OperatingIncome',
		'EBIT',
		'FreeCashFlow',
		'NormalizedEBITDA',
		'EBITDA',
		'OperatingCashFlow',
		'OperatingExpense',
		'ReconciledCostOfRevenue',
		'CostOfRevenue',
		'GrossProfit',
		'EndCashPosition',
		'TotalExpenses',
		'TotalRevenue',
		'MarketCap',
		'EnterpriseValue'
	]

	for m in measures:
		if m not in df.columns:
			continue
		df[m + 'YoY'] = None
		for i in range(1, len(df[m]) - 5):
			if not (pd.isna(df.at[i, m]) or pd.isna(df.at[i + 4, m]) or df.at[i + 4, m] == 0):
				df.at[i, m + 'YoY'] = ((df.at[i, m] / df.at[i + 4, m]) - 1) * 100
	return df


In [None]:
import pandas as pd

today = '06/20/2022'
quarterly_financials = [quarterly_valuation_measures, quarterly_income_statement, quarterly_balance_sheet, quarterly_cash_flow]

all_df = pd.DataFrame()
for stock in get_stock_tickers(data_dir):
	print(stock)
	df = combine_financials(data_dir, stock, quarterly_financials)
	df = calculate_YoY(df)
	df['Ticker'] = stock
	all_df = pd.concat([all_df, df], ignore_index=True)

	df.to_csv('{}/{}_quarterly_combined.csv'.format(data_dir, stock), index=False)
all_df.loc[all_df["Quarter"] == "ttm", "Quarter"] = today
all_df.to_csv('{}/all_quarterly_combined.csv'.format(data_dir), index=False)


In [None]:
stock = 'SQ'
df = combine_financials(data_dir, stock, quarterly_financials)
df = calculate_YoY(df)


In [None]:
measures = [
	# 'InterestIncome',
	# 'NormalizedIncome',
	# 'DilutedNIAvailtoComStockholders',
	# 'NetIncomeCommonStockholders',
	# 'PretaxIncome',
	# 'TotalOperatingIncomeAsReported',
	# 'OperatingIncome',
	# 'EBIT',
	# 'FreeCashFlow',
	# 'NormalizedEBITDA',
	# 'EBITDA',
	# 'OperatingCashFlow',
	# 'OperatingExpense',
	# 'ReconciledCostOfRevenue',
	# 'CostOfRevenue',
	# 'GrossProfit',
	# 'EndCashPosition',
	# 'TotalExpenses',
	'TotalRevenue',
	# 'MarketCap',
	# 'EnterpriseValue'
]

df2 = all_df[['Quarter', 'Ticker'] + measures + [x + 'YoY' for x in measures]]
df2.to_csv('{}/all_test.csv'.format(data_dir), index=False)
