# Financial Statements

### Intro Analysis

In [5]:
import duckdb
con = duckdb.connect("big.duckdb")  # creates a tiny db file

a_d = '../Statements/Statement Data/financials_A-D.csv'
e_j = '../Statements/Statement Data/financials_E-J.csv'
k_p = '../Statements/Statement Data/financials_K-P.csv'
q_z = '../Statements/Statement Data/financials_Q-Z.csv'


df = con.sql(f"""
with combined_tables as (
SELECT 
    *
FROM read_csv_auto('{a_d}')
union
select
*
from read_csv_auto('{e_j}')
union
select
*
from read_csv_auto('{k_p}')
union
select
*
from read_csv_auto('{q_z}')
)
select
year(exactDate) as year
,count(distinct symbol)
from combined_tables
group by 1
order by 1
""").df()

df

Unnamed: 0,year,count(DISTINCT symbol)
0,1951,2
1,1954,1
2,1956,2
3,1958,1
4,1959,1
5,1963,1
6,1966,2
7,1967,1
8,1968,1
9,1969,1


In [4]:
df = con.sql(f"""
with combined_tables as (
SELECT 
    *
    ,'a-f' as source
FROM read_csv_auto('{a_f}')
union
select
*
    ,'g-l' as source
from read_csv_auto('{g_l}')
union
select
*
    ,'m-r' as source
from read_csv_auto('{m_r}')
union
select
*
    ,'s-z' as source
from read_csv_auto('{s_z}')
)
select
year(exactDate) as year
,count(distinct symbol)
from combined_tables
group by 1
order by 1
""").df()

df

Unnamed: 0,year,count(DISTINCT symbol)
0,1951,2
1,1954,1
2,1956,2
3,1958,1
4,1959,1
5,1963,1
6,1966,2
7,1967,1
8,1968,1
9,1969,1


# Dicking arounf with daily price data that we pulled earlier

In [7]:
daily_prices_path = r'../Prices/Price Data/daily_price_data.csv' 

df_prices = con.sql(f"""
select
    *
from read_csv(
    '{daily_prices_path}',
    all_varchar=true
    )
where symbol = 'KO'
""").df()

df_prices

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,date,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime,symbol
0,2025-08-19,69.3,70.22,69.24,70.13,70.13,13862214,13862214,0.83,1.2,69.7225,"August 19, 25",0.012,KO
1,2025-08-18,69.98,70.05,69.05,69.13,69.13,14094900,14094900,-0.85,-1.21,69.5525,"August 18, 25",-0.0121,KO
2,2025-08-15,69.74,70.39,69.57,69.92,69.92,12381112,12381112,0.18,0.2581,69.905,"August 15, 25",0.002581,KO
3,2025-08-14,70.38,70.46,69.39,69.55,69.55,10722400,10722400,-0.83,-1.18,69.945,"August 14, 25",-0.0118,KO
4,2025-08-13,70.69,70.77,70.23,70.46,70.46,10356518,10356518,-0.23,-0.32536,70.5375,"August 13, 25",-0.0032536,KO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2507,2015-08-28,39.11,39.47,39.05,39.45,28.76,14503900,14503900,0.34,0.86934,39.27,"August 28, 15",0.0086934,KO
2508,2015-08-27,39.06,39.38,38.6,39.27,28.63,18871600,18871600,0.21,0.53763,39.0775,"August 27, 15",0.0053763,KO
2509,2015-08-26,38.75,38.83,37.89,38.73,28.24,23523300,23523300,-0.02,-0.0516129,38.55,"August 26, 15",-0.000516129,KO
2510,2015-08-25,39.13,39.28,37.93,37.99,27.7,31363717,31363717,-1.14,-2.91,38.5825,"August 25, 15",-0.0291,KO


## Inspecting the companies that failed price 10-30yr

In [4]:
import duckdb
con = duckdb.connect("big.duckdb")

daily_prices_path = r'../Prices/symbols_with_no_price_30yr.csv' 
profiles_path = r'../Ticker Symbols/Symbol Lists/company_profiles.csv' 

df_prices = con.sql(f"""
                    
with base as (
select
    a.*
    ,b.ipoDate
from read_csv(
    '{daily_prices_path}',
    all_varchar=true
    ) as a
left join read_csv(
    '{profiles_path}',
    all_varchar=true
    ) as b
on a.symbol = b.symbol
)
select
    *
from base
where ipoDate < '2015-08-25'
""").df()

df_prices

Unnamed: 0,symbol,ipoDate
0,AAM,2010-02-22
1,ACCS,2008-01-23
2,AIKI,1980-03-18
3,AIOT,1999-06-30
4,ALBO,2007-05-11
...,...,...
190,WTF,2006-06-01
191,WXM,2009-12-29
192,XIFR,2014-06-27
193,XSPA,2010-06-18


## Analyzing 30yr price size

In [7]:
import duckdb
con = duckdb.connect("big.duckdb")  # creates a tiny db file

a_f = '../Prices/Price Data/30yr_price_data_A-F.csv'
g_l = '../Prices/Price Data/30yr_price_data_G-L.csv'
m_r = '../Prices/Price Data/30yr_price_data_M-R.csv'
s_z = '../Prices/Price Data/30yr_price_data_S-Z.csv'


df = con.sql(f"""
with combined_tables as (
SELECT 
    *
FROM read_csv_auto('{a_f}')
union
select
*
from read_csv_auto('{g_l}')
union
select
*
from read_csv_auto('{m_r}')
union
select
*
from read_csv_auto('{s_z}')
)
select
year(date) as year
,count(distinct symbol)
from combined_tables
group by 1
order by 1
""").df()

df

Unnamed: 0,year,count(DISTINCT symbol)
0,1985,676
1,1986,730
2,1987,780
3,1988,806
4,1989,837
5,1990,864
6,1991,921
7,1992,1010
8,1993,1121
9,1994,1245
