In [1]:
import pandas as pd
import numpy as np
import wrds

In [2]:
from datetime import datetime
from scipy.stats import skew, kurtosis
import time

In [4]:
# Establish connection to WRDS
db = wrds.Connection()

Enter your WRDS username [ec2-user]: ly229
Enter your password: ········


WRDS recommends setting up a .pgpass file.


Create .pgpass file now [y/n]?:  y


Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [19]:
query_check_1 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 / shrout1 AS ownership
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2022-03-31' AND '2022-03-31'
    AND cusip = '00036020'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    check_result_1 = db.raw_sql(query_check_1)
    print(check_result_1)
except Exception as e:
    print("Error executing test query:", e)


                     fundname        date     ownership
0    FLORIDA STATE BOARD ADMI  2022-03-31  6.271509e-04
1    MORGAN STANLEY INTL FUND  2022-03-31  3.773585e-08
2    VANGUARD US ALL-CAP EQ I  2022-03-31  1.094340e-06
3    DAIWA US SMALL CAP EQUIT  2022-03-31  1.854679e-03
4    BLACKROCK SMALL CAP INDE  2022-03-31  9.313208e-05
..                        ...         ...           ...
271   INVESCO SP SMALLCAP 600  2022-03-31  4.579245e-05
272  ETC 6 MERIDIAN QUALITY G  2022-03-31  8.226415e-06
273  JNL SMALL CAP INDEX FUND  2022-03-31  2.130679e-03
274  GENEVA SMID CAP GROWTH F  2022-03-31  1.664151e-05
275   NATIONWIDE RUSSELL 2000  2022-03-31  4.849057e-06

[276 rows x 3 columns]


In [9]:
df_1 = check_result_1[['fundname', 'ownership']].copy()

In [51]:
index_keywords = ['INDEX', 'ETF', 'S&P', 'NASDAQ', 'MSCI', 'FTSE', 'TSX', 'PASSIVE', '500', '2000']
df_1['group'] = df_1['fundname'].apply(
    lambda x: 'Index' if (any(keyword in str(x) for keyword in index_keywords) 
                          and not str(x).startswith('NASDAQ'))
                          or ('Russell' in str(x) and not str(x).startswith('Russell')) 
                          or 'Russell 2000' in str(x)
                          or 'DOW ' in str(x)  # Contains 'DOW ' (with space)
                          or 'DOW30' in str(x)
                          or str(x).endswith('IND') else 'Active'
)

# Step 2: Sum shares by cusip and group
df_grouped_22q1 = df_1.groupby(['group'])['ownership'].sum().reset_index()

In [52]:
df_grouped_22q1

Unnamed: 0,group,ownership
0,Active,0.27248
1,Index,0.170619


In [54]:
query_check_2 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 / shrout1 AS ownership
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2022-03-31' AND '2022-03-31'
    AND cusip = '03783310'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    check_result_2 = db.raw_sql(query_check_2)
    print(check_result_2)
except Exception as e:
    print("Error executing test query:", e)

Empty DataFrame
Columns: [fundname, date, ownership]
Index: []


In [43]:
df_2 = check_result_2[['fundname', 'ownership']].copy()

In [44]:
index_keywords = ['INDEX', 'ETF', 'S&P', 'NASDAQ', 'MSCI', 'FTSE', 'TSX', 'PASSIVE', '500', '2000']
df_2['group'] = df_2['fundname'].apply(
    lambda x: 'Index' if (any(keyword in str(x) for keyword in index_keywords) 
                          and not str(x).startswith('NASDAQ'))
                          or ('Russell' in str(x) and not str(x).startswith('Russell')) 
                          or 'Russell 2000' in str(x)
                          or 'DOW ' in str(x)  # Contains 'DOW ' (with space)
                          or 'DOW30' in str(x)
                          or str(x).endswith('IND') else 'Active'
)

# Step 2: Sum shares by cusip and group
df_grouped_2 = df_2.groupby(['group'])['ownership'].sum().reset_index()

In [45]:
df_grouped_2

Unnamed: 0,group,ownership


## Find Problem and Fix 

### 1) Testing filter written in Python and SQl language

#### - proven to have same effect

In [57]:
query_check_3 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 / shrout1 AS ownership
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2022-03-31' AND '2022-03-31'
    AND cusip = '00036110'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    check_result_3 = db.raw_sql(query_check_3)
    print(check_result_3)
except Exception as e:
    print("Error executing test query:", e)

                     fundname        date  ownership
0    FLORIDA STATE BOARD ADMI  2022-03-31   0.004390
1    VANGUARD US ALL-CAP EQ I  2022-03-31   0.000001
2    CORNERSTONE ADVISORS GLO  2022-03-31   0.000200
3    ISHARES FACTORS US SMALL  2022-03-31   0.000004
4    PGIM QMA STRATEGIC ALPHA  2022-03-31   0.000015
..                        ...         ...        ...
302  NATIONWIDE NVIT GS SMALL  2022-03-31   0.000219
303   STRATEGAS GLOBAL POLICY  2022-03-31   0.000012
304  AVANTIS US SMALL CAP EQU  2022-03-31   0.000003
305  DFA DIMENSIONAL US SMALL  2022-03-31   0.000054
306  SPDR SP SMALLCAP 600 ESG  2022-03-31   0.000004

[307 rows x 3 columns]


In [58]:
df_3 = check_result_3[['fundname', 'ownership']].copy()
index_keywords = ['INDEX', 'ETF', 'S&P', 'NASDAQ', 'MSCI', 'FTSE', 'TSX', 'PASSIVE', '500', '2000']
df_3['group'] = df_3['fundname'].apply(
    lambda x: 'Index' if (any(keyword in str(x) for keyword in index_keywords) 
                          and not str(x).startswith('NASDAQ'))
                          or ('Russell' in str(x) and not str(x).startswith('Russell')) 
                          or 'Russell 2000' in str(x)
                          or 'DOW ' in str(x)  # Contains 'DOW ' (with space)
                          or 'DOW30' in str(x)
                          or str(x).endswith('IND') else 'Active'
)

# Step 2: Sum shares by cusip and group
df_grouped_3 = df_3.groupby(['group'])['ownership'].sum().reset_index()

In [59]:
df_grouped_3

Unnamed: 0,group,ownership
0,Active,0.331415
1,Index,0.255729


In [60]:
start_time_ins = time.time()

test_query_00036110 = """
    WITH Ownership AS (
        SELECT
            cusip, 
            fdate AS date,
            shares / 1000000 / shrout1 AS ownership,
            CASE
                WHEN fundname LIKE '%%IND' OR fundname LIKE '%%INDEX%%' OR fundname LIKE '%%ETF%%' OR 
                     fundname LIKE '%%S&P%%' OR fundname LIKE '%%NASDAQ%%' OR fundname LIKE '%%MSCI%%' OR 
                     fundname LIKE '%%FTSE%%' OR fundname LIKE '%%TSX%%' OR fundname LIKE '%%PASSIVE%%' OR
                     fundname LIKE '%%500%%' OR fundname LIKE '%%2000%%' OR 
                     fundname LIKE 'NASDAQ%%' OR
                     (fundname LIKE '%%Russell%%' AND fundname NOT LIKE 'Russell%%') OR
                     fundname LIKE '%%Russell 2000%%' OR
                     fundname LIKE '%%DOW %%' OR fundname LIKE '%%DOW30%%'
                     
                THEN 'Index Fund'
                ELSE 'Active Fund'
            END AS fund_type
        
        FROM 
            tfn.s12
        
        WHERE 
            fdate BETWEEN '2022-03-31' AND '2022-03-31'
            AND country = 'UNITED STATES'
            AND cusip = '00036110'
            AND shrout1 > 0
    )
    SELECT 
        cusip,
        date,
        SUM(CASE WHEN fund_type = 'Index Fund' THEN ownership ELSE 0 END) AS index_own,
        SUM(CASE WHEN fund_type = 'Active Fund' THEN ownership ELSE 0 END) AS active_own
    FROM 
        Ownership
    GROUP BY 
        cusip, date
    ORDER BY 
        cusip, date
    
"""

try:
    test_query_00036110 = db.raw_sql(test_query_00036110)
    print(test_query_00036110)
except Exception as e:
    print("Error executing test query:", e)
    
end_time_ins = time.time()
execution_time_ins = end_time_ins - start_time_ins

print(f"Execution time for test ownership query: {execution_time_ins:.4f} seconds")

      cusip        date  index_own  active_own
0  00036110  2022-03-31   0.255729    0.331415
Execution time for test ownership query: 0.0223 seconds


### -2)  check extreme outlier for active ownership

In [46]:
df_own= pd.read_csv('tnf_own_combined.csv')

In [47]:
unique_cusip_count_all = df_own['cusip'].nunique()

# Display the result
print(unique_cusip_count_all)

63178


In [74]:
#df_own.describe()
df_own.index_own.max()
df_own[df_own.active_own >= 100]

Unnamed: 0,cusip,date,index_own,active_own
355460,23319E20,2010-12-31,0.0,816.371519
380196,05518F20,2011-12-31,0.0,115.795
550899,48124BAC,2017-12-31,0.0,144.731789
555587,693475AK,2017-12-31,0.0,333.07571
598277,48124BAC,2018-03-31,0.0,149.111729
598278,48124BAC,2018-06-30,0.0,149.603508
598279,48124BAC,2018-09-30,0.0,138.288958
598280,48124BAC,2018-12-31,0.0,137.241774
603807,693475AK,2018-03-31,0.0,362.940811
603808,693475AK,2018-06-30,0.0,348.226012


In [13]:
query_check_693475AK = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND cusip = '693475AK'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_693475AK = db.raw_sql(query_check_693475AK)
    print(query_check_693475AK)
except Exception as e:
    print("Error executing test query:", e)

                     fundname        date  shares  shrout1
0     AT INCOME OPPORTUNITIES  2020-12-31   2.500      1.0
1    COHEN STEERS TAXADV PREF  2020-12-31   4.000      1.0
2    FLAHERTY&CRUMRINE DYN PR  2020-12-31   5.341      1.0
3     JPMORGAN INCOME BUILDER  2020-12-31  10.102      1.0
4      JOHN HANCOCK BOND FUND  2020-12-31  29.683      1.0
..                        ...         ...     ...      ...
482  ALPSSMITH CREDIT OPPORTU  2022-12-31   5.302      1.0
483  COHEN STEERS TAXADV PREF  2022-12-31  37.399      1.0
484  COHEN STEERS REAL ESTATE  2022-12-31   2.000      1.0
485   TYPHON TACTICAL MANAGED  2022-12-31   0.755      1.0
486   INVESCO ESG GLOBAL BOND  2022-12-31   0.100      1.0

[487 rows x 4 columns]


In [14]:
df_693 = query_check_693475AK

In [15]:
df_693[df_693.shares >= df_693.shrout1]

Unnamed: 0,fundname,date,shares,shrout1
0,AT INCOME OPPORTUNITIES,2020-12-31,2.500,1.0
1,COHEN STEERS TAXADV PREF,2020-12-31,4.000,1.0
2,FLAHERTY&CRUMRINE DYN PR,2020-12-31,5.341,1.0
3,JPMORGAN INCOME BUILDER,2020-12-31,10.102,1.0
4,JOHN HANCOCK BOND FUND,2020-12-31,29.683,1.0
...,...,...,...,...
476,JOHN HANCOCK VIT SELECT,2022-12-31,11.183,1.0
479,COHEN STEERS LIMITED DUR,2022-12-31,4.720,1.0
482,ALPSSMITH CREDIT OPPORTU,2022-12-31,5.302,1.0
483,COHEN STEERS TAXADV PREF,2022-12-31,37.399,1.0


In [16]:
df_693['date'] = pd.to_datetime(df_693['date'])
# Group by hour and sum sales
df_693.groupby(df_693['date'].dt.to_period('Q'))['shares'].sum()

date
2020Q4    239.182709
2021Q1    262.614000
2021Q2    261.182000
2021Q3    291.820000
2021Q4    279.224000
2022Q1    277.092000
2022Q2    277.757000
2022Q3    262.013000
2022Q4    257.631000
Freq: Q-DEC, Name: shares, dtype: float64

In [10]:
query_check_693475AK = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2017-12-01' AND '2017-12-31'
    AND cusip = '693475AK'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_693475AK = db.raw_sql(query_check_693475AK)
    print(query_check_693475AK)
except Exception as e:
    print("Error executing test query:", e)

                    fundname        date     shares  shrout1
0   JOHN HANCOCK FDS II ACTI  2017-12-31   1.240000      1.0
1   FLAHERTY&CRUMRINE DYN PR  2017-12-31   9.928000      1.0
2   FLAHERTY&CRUMRINE/CLAYMO  2017-12-31   3.043000      1.0
3   FLAHERTY&CRUMRINE CLAYMO  2017-12-31   2.850000      1.0
4   FLAHERTY & CRUMRINE PREF  2017-12-31   0.625000      1.0
5   PREFERRED INC OPPORTUNIT  2017-12-31   0.395000      1.0
6     JOHN HANCOCK BOND FUND  2017-12-31  10.258000      1.0
7         COMMERCE BOND FUND  2017-12-31   2.500000      1.0
8   JOHN HANCOCK INVT GRD BD  2017-12-31   0.801000      1.0
9   JOHN HANCOCK INC SECURIT  2017-12-31   0.520000      1.0
10  VIRTUS MULTI SECTOR FI F  2017-12-31   1.000000      1.0
11  NUVEEN MULTI STRAT INC &  2017-12-31  26.603000      1.0
12  JOHN HANCOCK BALANCED FU  2017-12-31   0.555000      1.0
13    INCOME FUND OF AMERICA  2017-12-31  10.250000      1.0
14  COHEN&STEERS REIT & PREF  2017-12-31   2.775000      1.0
15  NUVEEN TAX ADVANTAGE

### -3) Test for potential wrong shrout entries

#### - i) for shrout1 = 1.0  ---- decision: drop 

In [18]:
query_check_shrt1 = """
    SELECT 
    fundname, 
    cusip,
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND country = 'UNITED STATES'
    AND shrout1 = 1.0
"""
try:
    query_check_shrt1 = db.raw_sql(query_check_shrt1)
    print(query_check_shrt1)
except Exception as e:
    print("Error executing test query:", e)

                       fundname     cusip        date    shares  shrout1
0       PINNACLE DYNAMIC GROWTH  33737J12  2020-12-31  0.015340      1.0
1       PINNACLE DYNAMIC GROWTH  33737J13  2020-12-31  0.020765      1.0
2       PINNACLE DYNAMIC GROWTH  33738R81  2020-12-31  0.008875      1.0
3      ETFIS VIRTUS GLOVISTA EM  35473P76  2020-12-31  0.005525      1.0
4      GERSTEIN FISHER MULTI-FA  J0018410  2020-12-31  0.000005      1.0
...                         ...       ...         ...       ...      ...
37180  FLEXSHARES MORNINGSTAR E  Y8363Z10  2022-12-31  0.000094      1.0
37181  VANGUARD ESG DEVELOPED W  Y8363Z10  2022-12-31  0.000002      1.0
37182   VANGUARD ESG GLOBAL ALL  Y8363Z10  2022-12-31  0.000001      1.0
37183   STATE STREET GLOBAL ALL  Y8363Z10  2022-12-31  0.000281      1.0
37184   VANGUARD ESG DVLPD ASIA  Y8363Z10  2022-12-31  0.000002      1.0

[37185 rows x 5 columns]


In [20]:
df_shrt1 = query_check_shrt1

In [21]:
df_shrt1 = pd.DataFrame(df_shrt1)

# Convert 'Date' to datetime
df_shrt1['date'] = pd.to_datetime(df_shrt1['date'])

# Add 'Quarter' column
df_shrt1['Quarter'] = df_shrt1['date'].dt.to_period('Q')

# Group by 'CUSIP' and 'Quarter', then sum 'Value'
grouped_df_shrt1 = df_shrt1.groupby(['cusip', 'Quarter'])['shares'].sum().reset_index()

In [26]:
unique_cusips_count = grouped_df_shrt1['cusip'].nunique()

# Display the result
print(unique_cusips_count)

562


In [33]:
query_check_00162Q51 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2016-12-01' AND '2024-12-31'
    AND cusip = '00162Q51'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_00162Q51 = db.raw_sql(query_check_00162Q51)
    print(query_check_00162Q51)
except Exception as e:
    print("Error executing test query:", e)

                    fundname        date    shares  shrout1
0   RIVERFRONT DYNAMIC EQ IN  2018-03-31  0.337038      3.0
1    RIVERFRONT GLOBAL ALLOC  2018-03-31  0.225742      3.0
2   RIVERFRONT MODERATE GR &  2018-03-31  0.483468      3.0
3   RIVERFRONT ASSET ALLOCAT  2018-03-31  0.384456      3.0
4   RIVERFRONT ASSET ALLOCAT  2018-03-31  0.025513      3.0
..                       ...         ...       ...      ...
77  RIVERFRONT DYNAMIC EQ IN  2022-12-31  0.122214      1.0
78  RIVERFRONT DYNAMIC EQ IN  2023-03-31  0.113395      1.0
79  RIVERFRONT DYNAMIC EQ IN  2023-06-30  0.108042      1.0
80  RIVERFRONT DYNAMIC EQ IN  2023-09-30  0.105779      1.0
81  RIVERFRONT DYNAMIC EQ IN  2023-12-31  0.098661      1.0

[82 rows x 4 columns]


In [37]:
df_0016 = query_check_00162Q51

In [41]:
df_0016['ownership'] = df_0016['shares']/df_0016['shrout1']

In [42]:
df_0016['date'] = pd.to_datetime(df_0016['date'])
# Group by hour and sum sales
df_0016.groupby(df_0016['date'].dt.to_period('Q'))['ownership'].sum()

date
2018Q1    0.485406
2018Q2    0.373237
2018Q3    0.367264
2018Q4    0.326224
2019Q1    0.388200
2019Q2    0.371407
2019Q3    0.410552
2019Q4    0.376641
2020Q1    0.469950
2020Q2    0.444951
2020Q3    0.296759
2020Q4    0.274274
2021Q1    0.509876
2021Q2    0.495873
2021Q3    0.406557
2021Q4    0.225174
2022Q1    0.225174
2022Q2    0.368244
2022Q3    0.355858
2022Q4    0.347388
2023Q1    0.113395
2023Q2    0.108042
2023Q3    0.105779
2023Q4    0.098661
Freq: Q-DEC, Name: ownership, dtype: float64

#### - ii) for shrout1 = 2.0  ---- decision:  drop

In [48]:
query_check_shrt2 = """
    SELECT 
    fundname, 
    cusip,
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND country = 'UNITED STATES'
    AND shrout1 = 2.0
"""
try:
    query_check_shrt2 = db.raw_sql(query_check_shrt2)
    print(query_check_shrt2)
except Exception as e:
    print("Error executing test query:", e)

                       fundname     cusip        date    shares  shrout1
0      LINCOLN DIMENSIONAL INTL  F1226412  2020-12-31  0.000010      2.0
1       FIDELITY NORTHSTAR FUND  J0503N10  2020-12-31  0.010000      2.0
2      OSHAUGHNESSY SMALLMID CA  04990410  2020-12-31  0.000023      2.0
3      WELLS FARGO FACTOR ENHAN  14047520  2020-12-31  0.000001      2.0
4      ASPIRATION FLAGSHIP FUND  37954Y47  2020-12-31  0.007316      2.0
...                         ...       ...         ...       ...      ...
37555  FLEXSHARES MORNINGSTAR E  Y9858R10  2022-12-31  0.000235      2.0
37556  VANGUARD ESG DEVELOPED W  Y9858R10  2022-12-31  0.000006      2.0
37557   VANGUARD ESG GLOBAL ALL  Y9858R10  2022-12-31  0.000004      2.0
37558   STATE STREET GLOBAL ALL  Y9858R10  2022-12-31  0.000416      2.0
37559   VANGUARD ESG DVLPD ASIA  Y9858R10  2022-12-31  0.000001      2.0

[37560 rows x 5 columns]


In [50]:
df_shrt2 = query_check_shrt2

In [62]:
df_shrt2[df_shrt2.shares >= 0.1]

Unnamed: 0,fundname,cusip,date,shares,shrout1,Quarter
19,POWERSHARES DWA TACT SEC,46137V81,2020-12-31,0.157422,2.0,2020Q4
20,POWERSHARES DWA TACT SEC,46137V84,2020-12-31,0.183042,2.0,2020Q4
75,FIDELITY GLOBAL INTRINSI,J1258T10,2020-12-31,0.151400,2.0,2020Q4
95,FIDELITY GLOBAL INTRINSI,J9560010,2020-12-31,0.133600,2.0,2020Q4
148,PUTNAM EQUITY SPECTRUM F,02153X10,2020-12-31,0.116050,2.0,2020Q4
...,...,...,...,...,...,...
35859,VANGUARD LIFESTRATEGY 40,G9T17W49,2022-12-31,2.477443,2.0,2022Q4
36477,FIDELITY LOW PRICED STOC,J2302Q10,2022-12-31,0.127677,2.0,2022Q4
37430,FIDELITY NORTHSTAR FUND,Y5094610,2022-12-31,0.100097,2.0,2022Q4
37435,YACKTMAN FOCUSED FUND,Y5275R11,2022-12-31,0.118000,2.0,2022Q4


In [51]:
df_shrt2 = pd.DataFrame(df_shrt2)

# Convert 'Date' to datetime
df_shrt2['date'] = pd.to_datetime(df_shrt2['date'])

# Add 'Quarter' column
df_shrt2['Quarter'] = df_shrt2['date'].dt.to_period('Q')

# Group by 'CUSIP' and 'Quarter', then sum 'Value'
grouped_df_shrt2 = df_shrt2.groupby(['cusip', 'Quarter'])['shares'].sum().reset_index()

In [61]:
unique_cusip_count_2 = df_shrt2['cusip'].nunique()

# Display the result
print(unique_cusip_count_2)

628


In [69]:
query_check_J2302Q10 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2010-12-01' AND '2024-12-31'
    AND cusip = 'J2302Q10'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_J2302Q10 = db.raw_sql(query_check_J2302Q10)
    print(query_check_J2302Q10)
except Exception as e:
    print("Error executing test query:", e)

                     fundname        date    shares  shrout1
0    FIDELITY LOW PRICED STOC  2017-12-31  0.123140      2.0
1    FIDELITY SER INTRINSIC O  2017-12-31  0.018200      2.0
2     FIDELITY NORTHSTAR FUND  2017-12-31  0.004060      2.0
3    FIDELITY NORTHSTAR BAL C  2017-12-31  0.000172      2.0
4    FIDELITY NORTHSTAR BAL S  2017-12-31  0.000830      2.0
..                        ...         ...       ...      ...
141  FIDELITY LOWPRICED STOCK  2024-03-31  0.010531      2.0
142  FIDELITY LOW PRICED STOC  2024-06-30  0.074490      2.0
143  FIDELITY LOWPRICED STOCK  2024-06-30  0.008173      2.0
144  FIDELITY LOW PRICED STOC  2024-09-30  0.024362      2.0
145  FIDELITY LOWPRICED STOCK  2024-09-30  0.002146      2.0

[146 rows x 4 columns]


In [70]:
df_J23 = query_check_J2302Q10

In [71]:
df_J23['ownership'] = df_J23['shares']/df_J23['shrout1']
df_J23['date'] = pd.to_datetime(df_J23['date'])
# Group by hour and sum sales
df_J23.groupby(df_J23['date'].dt.to_period('Q'))['ownership'].sum()

date
2017Q4    0.075001
2018Q1    0.074230
2018Q2    0.073316
2018Q3    0.073214
2018Q4    0.073419
2019Q1    0.073263
2019Q2    0.073218
2019Q3    0.072947
2019Q4    0.072886
2020Q1    0.072906
2020Q2    0.072872
2020Q3    0.072872
2020Q4    0.072854
2021Q1    0.072738
2021Q2    0.072732
2021Q3    0.072615
2021Q4    0.072617
2022Q1    0.072535
2022Q2    0.072538
2022Q3    0.072536
2022Q4    0.072525
2023Q1    0.072525
2023Q2    0.071702
2023Q3    0.069896
2023Q4    0.067446
2024Q1    0.052553
2024Q2    0.041332
2024Q3    0.013254
Freq: Q-DEC, Name: ownership, dtype: float64

#### - iii) for shrout1 = 3.0  ---- decision: drop

In [72]:
query_check_shrt3 = """
    SELECT 
    fundname, 
    cusip,
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND country = 'UNITED STATES'
    AND shrout1 = 3.0
"""
try:
    query_check_shrt3 = db.raw_sql(query_check_shrt3)
    print(query_check_shrt3)
except Exception as e:
    print("Error executing test query:", e)

                       fundname     cusip        date    shares  shrout1
0      GERSTEIN FISHER MULTI-FA  04410410  2020-12-31  0.000001      3.0
1      WELLS FARGO FACTOR ENHAN  31942S10  2020-12-31  0.000002      3.0
2      PINNACLE SHERMAN TACT AL  33734G10  2020-12-31  0.029330      3.0
3      GERSTEIN FISHER MULTI-FA  J2754410  2020-12-31  0.000011      3.0
4      NEUBERGER BERMAN MULTI-A  42782540  2020-12-31  0.000764      3.0
...                         ...       ...         ...       ...      ...
39801  LYRICAL INTERNATIONAL VA  J7557310  2022-12-31  0.000083      3.0
39802  FIDELITY LOWPRICED STOCK  J7557310  2022-12-31  0.005970      3.0
39803  FIDELITY ASSET ALLOC CUR  J7557310  2022-12-31  0.000015      3.0
39804  FIDELITY GLOBAL INTRINSI  J7557310  2022-12-31  0.000881      3.0
39805  FIDELITY BALANCED INCOME  J7557310  2022-12-31  0.000013      3.0

[39806 rows x 5 columns]


In [73]:
df_shrt3 = query_check_shrt3

In [74]:
df_shrt3 = pd.DataFrame(df_shrt3)

# Convert 'Date' to datetime
df_shrt3['date'] = pd.to_datetime(df_shrt3['date'])

# Add 'Quarter' column
df_shrt3['Quarter'] = df_shrt3['date'].dt.to_period('Q')

# Group by 'CUSIP' and 'Quarter', then sum 'Value'
grouped_df_shrt3 = df_shrt3.groupby(['cusip', 'Quarter'])['shares'].sum().reset_index()

In [118]:
unique_cusip_count_3 = df_shrt3['cusip'].nunique()

# Display the result
print(unique_cusip_count_3)

662


In [None]:
J7557310

In [106]:
query_check_J7557310 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2021-06-01' AND '2021-06-30'
    AND cusip = 'J7557310'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_J7557310 = db.raw_sql(query_check_J7557310)
    print(query_check_J7557310)
except Exception as e:
    print("Error executing test query:", e)

                    fundname        date    shares  shrout1
0   FIDELITY WORLD UNDERVALU  2021-06-30  0.000250      3.0
1   FIDELITY WORLD UNDERVALU  2021-06-30  0.000155      3.0
2   FIDELITY GLOBAL UNDERVAL  2021-06-30  0.004429      3.0
3   FIDELITY GLOBAL UNDERVAL  2021-06-30  0.004970      3.0
4   DFA JAPANESE SMALL CO SE  2021-06-30  0.001100      3.0
5    DFA INTL SOCIAL CORE EQ  2021-06-30  0.000200      3.0
6   FIRST EAGLE OVERSEAS VAR  2021-06-30  0.002038      3.0
7   MFS SER V-MFS INTL NEW D  2021-06-30  0.118100      3.0
8    FIDELITY NORTHSTAR FUND  2021-06-30  0.003000      3.0
9   FIDELITY LOW PRICED STOC  2021-06-30  0.049665      3.0
10  NORTH AMER VP II INTL GR  2021-06-30  0.004600      3.0
11  FIRST EAGLE OVERSEAS FUN  2021-06-30  0.087561      3.0
12  FIDELITY INTL SML CAP OP  2021-06-30  0.030900      3.0
13   FIDELITY GLOBAL MONTHLY  2021-06-30  0.000026      3.0
14  FIDELITY TOTAL INTL EQUI  2021-06-30  0.000120      3.0
15  FIDELITY NORTHSTAR BAL C  2021-06-30

In [109]:
df_J75 = query_check_J7557310

In [110]:
df_J75['ownership'] = df_J75['shares']/df_J75['shrout1']

In [111]:
index_keywords = ['INDEX', 'ETF', 'S&P', 'NASDAQ', 'MSCI', 'FTSE', 'TSX', 'PASSIVE', '500', '2000']
df_J75['group'] = df_J75['fundname'].apply(
    lambda x: 'Index' if (any(keyword in str(x) for keyword in index_keywords) 
                          and not str(x).startswith('NASDAQ'))
                          or ('Russell' in str(x) and not str(x).startswith('Russell')) 
                          or 'Russell 2000' in str(x)
                          or 'DOW ' in str(x)  # Contains 'DOW ' (with space)
                          or 'DOW30' in str(x)
                          or str(x).endswith('IND') else 'Active'
)

# Step 2: Sum shares by cusip and group
df_grouped_J75 = df_J75.groupby(['group'])['ownership'].sum().reset_index()

In [112]:
df_grouped_J75

Unnamed: 0,group,ownership
0,Active,0.135394


In [79]:
df_J75['ownership'] = df_J75['shares']/df_J75['shrout1']
df_J75['date'] = pd.to_datetime(df_J75['date'])
# Group by hour and sum sales
df_J75.groupby(df_J75['date'].dt.to_period('Q'))['ownership'].sum()

date
2017Q4    0.087241
2018Q1    0.083422
2018Q2    0.079279
2018Q3    0.083963
2018Q4    0.084632
2019Q1    0.085662
2019Q2    0.086456
2019Q3    0.089789
2019Q4    0.097905
2020Q1    0.109772
2020Q2    0.111888
2020Q3    0.118894
2020Q4    0.126616
2021Q1    0.134258
2021Q2    0.135394
2021Q3    0.134577
2021Q4    0.134931
2022Q1    0.131965
2022Q2    0.145090
2022Q3    0.147314
2022Q4    0.146071
2023Q1    0.143581
2023Q2    0.133460
2023Q3    0.115223
2023Q4    0.114304
2024Q1    0.112637
2024Q2    0.112062
2024Q3    0.111979
Freq: Q-DEC, Name: ownership, dtype: float64

In [None]:
00037W10

In [113]:
query_check_00037W10 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2018-03-01' AND '2021-06-30'
    AND cusip = '00037W10'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_00037W10 = db.raw_sql(query_check_00037W10)
    print(query_check_00037W10)
except Exception as e:
    print("Error executing test query:", e)

                    fundname        date  shares  shrout1
0   MUTUAL FINANCIAL SERVICE  2018-03-31  0.2261      3.0
1   MUTUAL FINANCIAL SERVICE  2018-06-30  0.2261      3.0
2   MUTUAL FINANCIAL SERVICE  2018-09-30  0.2261      3.0
3   MUTUAL FINANCIAL SERVICE  2018-12-31  0.2261      3.0
4   MUTUAL FINANCIAL SERVICE  2019-03-31  0.2261      3.0
5   MUTUAL FINANCIAL SERVICE  2019-06-30  0.2261      3.0
6   MUTUAL FINANCIAL SERVICE  2019-09-30  0.2261      3.0
7   MUTUAL FINANCIAL SERVICE  2019-12-31  0.2261      3.0
8   MUTUAL FINANCIAL SERVICE  2020-03-31  0.2261      3.0
9   MUTUAL FINANCIAL SERVICE  2020-06-30  0.2261      3.0
10  MUTUAL FINANCIAL SERVICE  2020-09-30  0.2261      3.0
11  MUTUAL FINANCIAL SERVICE  2020-12-31  0.2261      3.0
12  MUTUAL FINANCIAL SERVICE  2021-03-31  0.2261      3.0
13  MUTUAL FINANCIAL SERVICE  2021-06-30  0.2261      3.0


In [None]:
Y9854K10

In [114]:
query_check_Y9854K10 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2018-03-01' AND '2021-06-30'
    AND cusip = 'Y9854K10'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_Y9854K10 = db.raw_sql(query_check_Y9854K10)
    print(query_check_Y9854K10)
except Exception as e:
    print("Error executing test query:", e)

                     fundname        date    shares  shrout1
0    OLD WESTBURY GBL SML&MID  2018-03-31  0.000382      3.0
1    DFA EMER MRKT SML CAP SE  2018-03-31  0.012949      3.0
2    DFA EMER MRKT CORE EQUIT  2018-03-31  0.018386      3.0
3    DFA EMG MRKT SOCIAL CORE  2018-03-31  0.001038      3.0
4    DFA WLD EX US CORE EQUIT  2018-03-31  0.000637      3.0
..                        ...         ...       ...      ...
220  JOHN HANCOCK EMG MRKT VA  2021-06-30  0.000137      2.0
221  DIMENSIONAL EMER MRKT VA  2021-06-30  0.000502      2.0
222   JOHN HANCOCK FDS II EMG  2021-06-30  0.000122      2.0
223  DIMENSIONAL FDS ICVC EMG  2021-06-30  0.001062      2.0
224  DIMENSIONAL FDS PLC II E  2021-06-30  0.000417      2.0

[225 rows x 4 columns]


In [115]:
df_Y98 = query_check_Y9854K10

In [116]:
df_Y98[df_Y98.shares > 0.1]

Unnamed: 0,fundname,date,shares,shrout1


#### - iv) for shrout1 = 4.0  ---- decision: drop

In [119]:
query_check_shrt4 = """
    SELECT 
    fundname, 
    cusip,
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND country = 'UNITED STATES'
    AND shrout1 = 4.0
"""
try:
    query_check_shrt4 = db.raw_sql(query_check_shrt4)
    print(query_check_shrt4)
except Exception as e:
    print("Error executing test query:", e)

                       fundname     cusip        date    shares  shrout1
0      LOOMIS SAYLES STRAT MTHL  28367820  2020-12-31  0.000151      4.0
1      BLACKSTONE DIVERSIFIED M  28852N20  2020-12-31  0.040000      4.0
2      WELLS FARGO FACTOR ENHAN  30263Y10  2020-12-31  0.000002      4.0
3      WELLS FARGO FACTOR ENHAN  35352510  2020-12-31  0.000002      4.0
4      WELLS FARGO FACTOR ENHAN  97425010  2020-12-31  0.000002      4.0
...                         ...       ...         ...       ...      ...
44319  JOHN HANCOCK EMG MRKT VA  Y1920C10  2022-12-31  0.002630      4.0
44320  UAM ACADIAN EMER MRKT PO  Y1R60912  2022-12-31  0.009029      4.0
44321  DELAWARE VIP EMERGING MA  Y1R60912  2022-12-31  0.004204      4.0
44322  ACADIAN EMG MRKT EQ UCIT  Y1R60912  2022-12-31  0.001137      4.0
44323  ACADIAN CORE INTL EQUITY  Y3907L10  2022-12-31  0.001014      4.0

[44324 rows x 5 columns]


In [121]:
df_shrt4 = query_check_shrt4

In [137]:
unique_cusip_count_4 = df_shrt4['cusip'].nunique()

# Display the result
print(unique_cusip_count_4)

640


In [122]:
df_shrt4 = pd.DataFrame(df_shrt4)

# Convert 'Date' to datetime
df_shrt4['date'] = pd.to_datetime(df_shrt4['date'])

# Add 'Quarter' column
df_shrt4['Quarter'] = df_shrt4['date'].dt.to_period('Q')

# Group by 'CUSIP' and 'Quarter', then sum 'Value'
grouped_df_shrt4 = df_shrt4.groupby(['cusip', 'Quarter'])['shares'].sum().reset_index()

In [123]:
grouped_df_shrt4

Unnamed: 0,cusip,Quarter,shares
0,00162Q52,2020Q4,1.229618
1,00162Q53,2020Q4,1.212899
2,00162Q53,2021Q1,1.284202
3,00162Q53,2021Q2,1.246777
4,00162Q53,2021Q4,1.076727
...,...,...,...
2485,Y8071011,2021Q4,0.000015
2486,Y8071011,2022Q1,0.000015
2487,Y8071011,2022Q2,0.003133
2488,Y8071011,2022Q3,0.002600


In [132]:
query_check_00162Q53 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2010-03-01' AND '2018-06-30'
    AND cusip = '00162Q53'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_00162Q53 = db.raw_sql(query_check_00162Q53)
    print(query_check_00162Q53)
except Exception as e:
    print("Error executing test query:", e)

                   fundname        date    shares  shrout1
0  RIVERFRONT DYNAMIC EQ IN  2018-03-31  0.246026      4.0
1   RIVERFRONT GLOBAL ALLOC  2018-03-31  0.044982      4.0
2  RIVERFRONT MODERATE GR &  2018-03-31  1.001527      4.0
3  RIVERFRONT ASSET ALLOCAT  2018-03-31  0.248274      4.0
4  RIVERFRONT DYNAMIC EQ IN  2018-06-30  0.595913      6.0
5   RIVERFRONT GLOBAL ALLOC  2018-06-30  0.138654      6.0
6  RIVERFRONT MODERATE GR &  2018-06-30  1.384908      6.0
7  RIVERFRONT ASSET ALLOCAT  2018-06-30  0.268634      6.0


In [133]:
df_0016['ownership'] = df_0016['shares']/df_0016['shrout1']

In [134]:
index_keywords = ['INDEX', 'ETF', 'S&P', 'NASDAQ', 'MSCI', 'FTSE', 'TSX', 'PASSIVE', '500', '2000']
df_0016['group'] = df_0016['fundname'].apply(
    lambda x: 'Index' if (any(keyword in str(x) for keyword in index_keywords) 
                          and not str(x).startswith('NASDAQ'))
                          or ('Russell' in str(x) and not str(x).startswith('Russell')) 
                          or 'Russell 2000' in str(x)
                          or 'DOW ' in str(x)  # Contains 'DOW ' (with space)
                          or 'DOW30' in str(x)
                          or str(x).endswith('IND') else 'Active'
)

# Step 2: Sum shares by cusip and group
df_grouped_0016 = df_0016.groupby(['group'])['ownership'].sum().reset_index()

In [135]:
df_grouped_0016

Unnamed: 0,group,ownership
0,Active,7.944886


#### - v) for shrout1 under 10.0 ---- decision: drop

In [138]:
query_check_shrt10 = """
    SELECT 
    fundname, 
    cusip,
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND country = 'UNITED STATES'
    AND shrout1 = 10.0
"""
try:
    query_check_shrt10 = db.raw_sql(query_check_shrt10)
    print(query_check_shrt10)
except Exception as e:
    print("Error executing test query:", e)

                       fundname     cusip        date    shares  shrout1
0      WELLS FARGO FACTOR ENHAN  16516774  2020-12-31  0.000004     10.0
1      WELLS FARGO FACTOR ENHAN  17306X10  2020-12-31  0.000007     10.0
2      WELLS FARGO FACTOR ENHAN  19202510  2020-12-31  0.000006     10.0
3      GUGGENHEIM INSIDER SENTI  53122970  2020-12-31  0.027622     10.0
4      NIKKO DOMESTIC EQUITY EN  J0126T10  2020-12-31  0.000254     10.0
...                         ...       ...         ...       ...      ...
75903   DFA DIMENSIONAL EM MKTS  Y6348A10  2022-12-31  0.000024     10.0
75904  TA WLD EX US CORE EQUITY  Y7758C11  2022-12-31  0.001550     10.0
75905  AVANTIS RESPONSIBLE EMER  Y7758C11  2022-12-31  0.000209     10.0
75906  DFA DIMENSIONAL EMERGING  Y7758C11  2022-12-31  0.000020     10.0
75907   DFA DIMENSIONAL EM MKTS  Y7758C11  2022-12-31  0.000012     10.0

[75908 rows x 5 columns]


In [139]:
df_shrt10 = query_check_shrt10

In [142]:
df_shrt10[df_shrt10.shares > 0.8]

Unnamed: 0,fundname,cusip,date,shares,shrout1
415,FIDELITY REAL ESTATE INC,16934Q50,2020-12-31,2.359586,10.0
910,FIDELITY LOW PRICED STOC,J8721510,2020-12-31,0.814404,10.0
1190,AIM MODERATE ALLOCATION,46138J72,2020-12-31,1.607791,10.0
1191,AIM INTL ALLOCATION FUND,46138J72,2020-12-31,0.869646,10.0
1267,FIDELITY LOW PRICED STOC,F6374D10,2020-12-31,0.845012,10.0
...,...,...,...,...,...
73946,ISHARES S&P U.S.PREF STK,G1624916,2022-12-31,0.858382,10.0
73952,COHEN & STEERS PREF SECS,G1624916,2022-12-31,1.162652,10.0
73962,FIRST TRUST PREFERRED SE,G1624916,2022-12-31,1.563959,10.0
74733,CITY NATIONAL ROCHDALE S,X6308C10,2022-12-31,6.170637,10.0


In [None]:
16934Q50

In [145]:
query_check_16934Q50 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2022-03-01' AND '2023-06-30'
    AND cusip = '16934Q50'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_16934Q50 = db.raw_sql(query_check_16934Q50)
    print(query_check_16934Q50)
except Exception as e:
    print("Error executing test query:", e)

                     fundname        date    shares  shrout1
0    INVESCO VARIABLE RATE PR  2022-03-31  0.000672     10.0
1           FBR BALANCED FUND  2022-03-31  0.001540     10.0
2    FIDELITY REAL ESTATE INC  2022-03-31  2.357586     10.0
3    FIDELITY ADVR STRAT REAL  2022-03-31  0.008700     10.0
4    ISHARES S&P U.S.PREF STK  2022-03-31  0.817866     10.0
..                        ...         ...       ...      ...
99   VANECK PREFERRED SECURIT  2023-06-30  0.228105     10.0
100  ISHARES SPTSX NORTH AMER  2023-06-30  0.006491     10.0
101  GLOBAL X VARIABLE RATE P  2023-06-30  0.099982     10.0
102  AMERICAN CENTURY QUALITY  2023-06-30  0.006423     10.0
103  HOYA CAPITAL HIGH DIVIDE  2023-06-30  0.006320     10.0

[104 rows x 4 columns]


#### - vi) for shrout1 under 15.0 ---- decision: drop

In [146]:
query_check_shrt15 = """
    SELECT 
    fundname, 
    cusip,
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2020-12-01' AND '2022-12-31'
    AND country = 'UNITED STATES'
    AND shrout1 = 15.0
"""
try:
    query_check_shrt15 = db.raw_sql(query_check_shrt15)
    print(query_check_shrt15)
except Exception as e:
    print("Error executing test query:", e)

                       fundname     cusip        date    shares  shrout1
0      QUANTIFIED ALL-CAP EQUIT  02913V10  2020-12-31  0.004475     15.0
1      WELLS FARGO FACTOR ENHAN  02913V10  2020-12-31  0.000010     15.0
2      WELLS FARGO FACTOR ENHAN  03940R10  2020-12-31  0.000009     15.0
3      WELLS FARGO FACTOR ENHAN  06643P10  2020-12-31  0.000010     15.0
4      WELLS FARGO FACTOR ENHAN  06684910  2020-12-31  0.000009     15.0
...                         ...       ...         ...       ...      ...
91023   JOHN HANCOCK FDS II EMG  Y9381K11  2022-12-31  0.000252     15.0
91024  TIAA-CREF INTL SML-CAP E  Y9381K11  2022-12-31  0.004121     15.0
91025  SEGALL BRYANT HAMILL EME  Y9381K11  2022-12-31  0.004614     15.0
91026  ISHARES MSCI INDIA SMALL  Y9381K11  2022-12-31  0.005097     15.0
91027  DFA DIMENSIONAL EMERGING  Y9381K11  2022-12-31  0.000032     15.0

[91028 rows x 5 columns]


In [147]:
df_shrt15 = query_check_shrt15

In [167]:
df_shrt15[df_shrt15.shares > 2]

Unnamed: 0,fundname,cusip,date,shares,shrout1
5116,JOHN HANCOCK LIFESTYLE B,47804J85,2020-12-31,3.484861,15.0
5118,JOHN HANCOCK LIFESTYLE G,47804J85,2020-12-31,7.188769,15.0
5706,HIGHLAND CREDIT STRAT FU,65344N10,2020-12-31,13.571131,15.0
8978,WITAN INVESTMENT TRUST P,G6S28V40,2020-12-31,12.711139,15.0
14367,JOHN HANCOCK LIFESTYLE B,47804J85,2021-03-31,3.244242,15.0
14369,JOHN HANCOCK LIFESTYLE G,47804J85,2021-03-31,6.820624,15.0
18017,WITAN INVESTMENT TRUST P,G6S28V40,2021-03-31,12.711139,15.0
27001,WITAN INVESTMENT TRUST P,G6S28V40,2021-06-30,13.007692,15.0
33862,JOHN HANCOCK LIFESTYLE B,47804J85,2021-09-30,3.015976,15.0
33864,JOHN HANCOCK LIFESTYLE G,47804J85,2021-09-30,6.66292,15.0


In [155]:
df_shrt15['date'] = pd.to_datetime(df_shrt15['date'])
df_shrt15_22q1 = df_shrt15[df_shrt15['date'] == pd.Timestamp('2022-03-31')]

In [156]:
df_shrt15_22q1

Unnamed: 0,fundname,cusip,date,shares,shrout1
49220,FLORIDA STATE BOARD ADMI,01933010,2022-03-31,0.172280,15.0
49221,FLORIDA STATE BOARD ADMI,03940R10,2022-03-31,0.003179,15.0
49222,OPPENHEIMER SENIOR FLOAT,03940R10,2022-03-31,0.002341,15.0
49223,FLORIDA STATE BOARD ADMI,04010E10,2022-03-31,0.008704,15.0
49224,FLORIDA STATE BOARD ADMI,04914Y10,2022-03-31,0.002368,15.0
...,...,...,...,...,...
59319,DIMENSIONAL FDS ICVC EMG,Y976AF10,2022-03-31,0.000578,15.0
59320,DFA DIMENSIONAL EMERGING,Y976AF10,2022-03-31,0.000012,15.0
59321,TA WLD EX US CORE EQUITY,Y9T9HH10,2022-03-31,0.002161,15.0
59322,DIMENSIONAL FDS ICVC EMG,Y9T9HH10,2022-03-31,0.003140,15.0


In [171]:
query_check_G6S28V40 = """
    SELECT 
    fundname, 
    fdate AS date, 
    shares / 1000000 AS shares,
    shrout1 AS shrout1
    
    FROM tfn.s12
    
    WHERE fdate BETWEEN '2002-03-01' AND '2024-06-30'
    AND cusip = 'G6S28V40'
    AND country = 'UNITED STATES'
    AND shrout1 > 0
"""
try:
    query_check_G6S28V40 = db.raw_sql(query_check_G6S28V40)
    print(query_check_G6S28V40)
except Exception as e:
    print("Error executing test query:", e)

                    fundname        date     shares  shrout1
0   WITAN INVESTMENT TRUST P  2020-03-31  12.601139     15.0
1   WITAN INVESTMENT TRUST P  2020-06-30  12.601139     15.0
2   WITAN INVESTMENT TRUST P  2020-09-30  12.601139     15.0
3   WITAN INVESTMENT TRUST P  2020-12-31  12.711139     15.0
4   WITAN INVESTMENT TRUST P  2021-03-31  12.711139     15.0
5   WITAN INVESTMENT TRUST P  2021-06-30  13.007692     15.0
6   WITAN INVESTMENT TRUST P  2021-09-30  13.007692     15.0
7   WITAN INVESTMENT TRUST P  2021-12-31  13.007692     15.0
8   WITAN INVESTMENT TRUST P  2022-03-31  13.007692     15.0
9   WITAN INVESTMENT TRUST P  2022-06-30  13.007692     15.0
10  WITAN INVESTMENT TRUST P  2022-09-30  13.007692     15.0
11  WITAN INVESTMENT TRUST P  2022-12-31  13.007692     15.0
12  WITAN INVESTMENT TRUST P  2023-03-31  13.007692     15.0
13  WITAN INVESTMENT TRUST P  2023-06-30  13.007692     15.0
14  WITAN INVESTMENT TRUST P  2023-09-30  13.007692     15.0
15  WITAN INVESTMENT TRU