# How to handle duplicate financial statements / entries

This is a scratch notebook for the data processing code


Some things to handle:
1) Muliple reports filed and amended
2) How many filings do I have for 2023? Seem way to small, maybe I'm missing some data?

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.core.pylabtools import figsize

import seaborn as sns
import plotly.express as px

import numpy as np
import pandas as pd
import polars as pl

import statsmodels.formula.api as smf

In [17]:
%run fetch_data.py

In [28]:
data = get_entries(2021, ['AssetsCurrent'])

 SELECT * FROM
                (SELECT adsh, tag, strftime('%Y', ddate) AS data_year, uom, value,
                 footnote FROM num
                 WHERE (data_year=:year) AND (tag = :tag0)
                ) AS number_table
                INNER JOIN
                (SELECT adsh, cik, name, period AS period_filed, prevrpt
                    FROM sub WHERE fy=:year) AS filedata
                ON filedata.adsh == number_table.adsh
            
{'tag0': 'AssetsCurrent', 'year': '2021'}


In [29]:
data

Unnamed: 0,adsh,tag,data_year,uom,value,footnote,adsh.1,cik,name,period_filed,prevrpt
0,0001838862-21-000028,AssetsCurrent,2021-01-01,USD,1.699724e+09,,0001838862-21-000028,12659,H&R BLOCK INC,2021-04-30,False
1,0001564590-21-033645,AssetsCurrent,2021-01-01,USD,6.001000e+09,,0001564590-21-033645,1002047,"NETAPP, INC.",2021-04-30,False
2,0000014693-21-000091,AssetsCurrent,2021-01-01,USD,3.917000e+09,,0000014693-21-000091,14693,BROWN FORMAN CORP,2021-04-30,False
3,0001564590-21-033616,AssetsCurrent,2021-01-01,USD,5.556700e+10,,0001564590-21-033616,1341439,ORACLE CORP,2021-05-31,False
4,0001564590-21-033352,AssetsCurrent,2021-01-01,USD,2.682530e+08,,0001564590-21-033352,1092796,"SMITH & WESSON BRANDS, INC.",2021-04-30,False
...,...,...,...,...,...,...,...,...,...,...,...
5449,0001213900-23-050974,AssetsCurrent,2021-01-01,USD,2.762470e+07,,0001213900-23-050974,50292,IEH CORP,2022-03-31,False
5450,0001410578-23-001462,AssetsCurrent,2021-01-01,USD,3.720000e+05,,0001410578-23-001462,786947,"ACURA PHARMACEUTICALS, INC",2021-12-31,False
5451,0001410578-23-002116,AssetsCurrent,2021-01-01,USD,2.114040e+05,,0001410578-23-002116,831489,SCORES HOLDING CO INC,2021-12-31,False
5452,0001477932-23-006679,AssetsCurrent,2021-01-01,USD,1.022000e+04,,0001477932-23-006679,1753681,ELITE PERFORMANCE HOLDING CORP,2021-12-31,False


In [12]:
data.sort_values('name')

Unnamed: 0,adsh,tag,data_year,uom,value,footnote,adsh.1,cik,name,period_filed,prevrpt
2873,0001477932-22-002081,Revenues,2021-01-01,USD,34069.0,,0001477932-22-002081,1877461,1606 CORP.,2021-12-31,False
1432,0001213900-22-016393,Revenues,2021-01-01,USD,362314000.0,,0001213900-22-016393,1810140,1847 GOEDEKER INC.,2021-12-31,False
1599,0001213900-22-016805,Revenues,2021-01-01,USD,30660984.0,,0001213900-22-016805,1599407,1847 HOLDINGS LLC,2021-12-31,False
2758,0000950170-22-010852,Revenues,2021-01-01,USD,243920000.0,,0000950170-22-010852,1804591,23ANDME HOLDING CO.,2022-03-31,False
2724,0001860782-22-000005,Revenues,2021-01-01,USD,54522000.0,,0001860782-22-000005,1860782,"2SEVENTY BIO, INC.",2021-12-31,False
...,...,...,...,...,...,...,...,...,...,...,...
1833,0001410578-22-000453,Revenues,2021-01-01,USD,40367000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
1830,0001410578-22-000453,Revenues,2021-01-01,USD,24127000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
1832,0001410578-22-000453,Revenues,2021-01-01,USD,34785000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
148,0001213900-21-047343,Revenues,2021-01-01,USD,,,0001213900-21-047343,1558740,ZYROX MINING INTERNATIONAL INC,2021-05-31,False


# Edge cases:

I have a feeling that there will be a lot of edge cases with this data, so I'll document them here

### Multiple revenue values

1. Note that the smaller values sum to the largest value, so this is some break down of revenues.
2. However, this breakdown is not the same as that seen in the orginal document: https://www.sec.gov/ixviewer/ix.html?doc=/Archives/edgar/data/846475/000141057822000453/zyxi-20211231x10k.htm

In [31]:
data = get_entries(2021, ['Revenues'])

data[data['name'] == 'ZYNEX INC']

 SELECT * FROM
                (SELECT adsh, tag, strftime('%Y', ddate) AS data_year, uom, value,
                 footnote FROM num
                 WHERE (data_year=:year) AND (tag = :tag0)
                ) AS number_table
                INNER JOIN
                (SELECT adsh, cik, name, period AS period_filed, prevrpt
                    FROM sub WHERE fy=:year) AS filedata
                ON filedata.adsh == number_table.adsh
            
{'tag0': 'Revenues', 'year': '2021'}


Unnamed: 0,adsh,tag,data_year,uom,value,footnote,adsh.1,cik,name,period_filed,prevrpt
1830,0001410578-22-000453,Revenues,2021-01-01,USD,24127000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
1831,0001410578-22-000453,Revenues,2021-01-01,USD,31022000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
1832,0001410578-22-000453,Revenues,2021-01-01,USD,34785000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
1833,0001410578-22-000453,Revenues,2021-01-01,USD,40367000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False
1834,0001410578-22-000453,Revenues,2021-01-01,USD,130301000.0,,0001410578-22-000453,846475,ZYNEX INC,2021-12-31,False


In [24]:
data[data['name'] == 'ZYNEX INC']['value'].sum() / 2

130301000.0

In [30]:
'000141057822000453'=='000141057822000453'

True