# Formatting and reformatting examples

In [1]:
# Add BipartitePandas to system path, do not run this
# import sys
# sys.path.append('../../..')

In [2]:
# Import the BipartitePandas package 
# (Make sure you have installed it using pip install bipartitepandas)
import bipartitepandas as bpd
import pandas as pd

In [3]:
# For the example, we simulate bipartite labor data
sim_data = bpd.SimBipartite().sim_network()[['i', 'j', 'y', 't']]
display(sim_data)

Unnamed: 0,i,j,y,t
0,0,145,1.661178,1
1,0,30,-1.193665,2
2,0,30,-0.037325,3
3,0,83,-0.474898,4
4,0,83,2.164489,5
...,...,...,...,...
49995,9999,144,2.299374,1
49996,9999,165,2.512607,2
49997,9999,165,0.681348,3
49998,9999,165,1.910863,4


## Formats

BipartitePandas includes 4 format classes:
- BipartiteLong
- BipartiteLongCollapsed
- BipartiteEventStudy
- BipartiteEventStudyCollapsed

For long data, each row gives a single observation. For collapsed long data, each row gives a single observation where employment spells at the same firm are collapsed into that observation. For event study data, each row gives the firms and compensation for a single worker over two consecutive periods (this is created from BipartiteLong). For collapsed event study data, each row gives the firms and salaries for a single worker over two consecutive employment spells (this is created from BipartiteLongCollapsed).

In [4]:
# Our data is in long form (each row gives a single observation)
bdf = bpd.BipartiteLong(sim_data)
bdf = bdf.clean()
display(bdf)
# Note the type
print(type(bdf))

Unnamed: 0,i,j,y,t
0,0,145,1.661178,1
1,0,30,-1.193665,2
2,0,30,-0.037325,3
3,0,83,-0.474898,4
4,0,83,2.164489,5
...,...,...,...,...
49995,9999,144,2.299374,1
49996,9999,165,2.512607,2
49997,9999,165,0.681348,3
49998,9999,165,1.910863,4


<class 'bipartitepandas.bipartitelong.BipartiteLong'>


## Initializing from different formats

In [5]:
# From long
long_df = pd.DataFrame(bdf, copy=True) # Reset our data into a Pandas dataframe
# Now initialize from long
long_bdf = bpd.BipartiteLong(long_df)
long_bdf = long_bdf.clean()
display(long_bdf)
# Note the type
print(type(long_bdf))

Unnamed: 0,i,j,y,t
0,0,145,1.661178,1
1,0,30,-1.193665,2
2,0,30,-0.037325,3
3,0,83,-0.474898,4
4,0,83,2.164489,5
...,...,...,...,...
49995,9999,144,2.299374,1
49996,9999,165,2.512607,2
49997,9999,165,0.681348,3
49998,9999,165,1.910863,4


<class 'bipartitepandas.bipartitelong.BipartiteLong'>


In [6]:
# From collapsed long
collapsed_long_df = pd.DataFrame(bdf.to_collapsed_long(), copy=True) # Reset our data into a Pandas dataframe
# Now initialize from collapsed long
collapsed_long_bdf = bpd.BipartiteLongCollapsed(collapsed_long_df)
collapsed_long_bdf = collapsed_long_bdf.clean()
display(collapsed_long_bdf)
# Note the type
print(type(collapsed_long_bdf))

Unnamed: 0,i,j,y,t1,t2,w,m
0,0,145,1.661178,1,1,1,1
1,0,30,-0.615495,2,3,2,1
2,0,83,0.844795,4,5,2,1
3,1,46,0.459030,1,2,2,1
4,1,50,-0.008153,3,5,3,1
...,...,...,...,...,...,...,...
29771,9998,34,-0.825938,3,4,2,1
29772,9998,147,-0.450058,5,5,1,1
29773,9999,144,2.299374,1,1,1,1
29774,9999,165,1.701606,2,4,3,1


<class 'bipartitepandas.bipartitelongcollapsed.BipartiteLongCollapsed'>


In [7]:
# From event study
es_df = pd.DataFrame(bdf.to_eventstudy(), copy=True) # Reset our data into a Pandas dataframe
# Now initialize from event study
es_bdf = bpd.BipartiteEventStudy(es_df)
es_bdf = es_bdf.clean()
display(es_bdf)
# Note the type
print(type(es_bdf))

Unnamed: 0,i,j1,j2,y1,y2,t1,t2,m
0,14,0,0,-2.034758,-2.034758,1,1,0
1,14,0,0,-0.314693,-0.314693,2,2,0
2,14,0,0,-1.677868,-1.677868,3,3,0
3,14,0,0,-2.425955,-2.425955,4,4,0
4,14,0,0,-3.253415,-3.253415,5,5,0
...,...,...,...,...,...,...,...,...
40680,9998,34,147,-1.059082,-0.450058,4,5,1
40681,9999,144,165,2.299374,2.512607,1,2,1
40682,9999,165,165,2.512607,0.681348,2,3,1
40683,9999,165,165,0.681348,1.910863,3,4,1


<class 'bipartitepandas.bipartiteeventstudy.BipartiteEventStudy'>


In [8]:
# From collapsed event study
collapsed_es_df = pd.DataFrame(bdf.to_collapsed_long().to_eventstudy(), copy=True) # Reset our data into a Pandas dataframe
# Now initialize from event study
collapsed_es_bdf = bpd.BipartiteEventStudyCollapsed(collapsed_es_df)
collapsed_es_bdf = collapsed_es_bdf.clean()
display(collapsed_es_bdf)
# Note the type
print(type(collapsed_es_bdf))

Unnamed: 0,i,j1,j2,y1,y2,t11,t12,t21,t22,w1,w2,m
0,14,0,0,-1.941338,-1.941338,1,5,1,5,5.0,5,0
1,16,111,111,0.961920,0.961920,1,5,1,5,5.0,5,0
2,31,149,149,1.810867,1.810867,1,5,1,5,5.0,5,0
3,43,148,148,1.870104,1.870104,1,5,1,5,5.0,5,0
4,49,144,144,0.126350,0.126350,1,5,1,5,5.0,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...
20456,9998,19,5,-2.218008,-2.844433,1,1,2,2,1.0,1,1
20457,9998,5,34,-2.844433,-0.825938,2,2,3,4,1.0,2,1
20458,9998,34,147,-0.825938,-0.450058,3,4,5,5,2.0,1,1
20459,9999,144,165,2.299374,1.701606,1,1,2,4,1.0,3,1


<class 'bipartitepandas.bipartiteeventstudycollapsed.BipartiteEventStudyCollapsed'>


## Converting formats

In [9]:
# While our original data is long, we might want it to be in event study form (each row gives two consecutive observations)
bdf = bdf.to_eventstudy()
display(bdf)
# Note the type
print(type(bdf))

Unnamed: 0,i,j1,j2,y1,y2,t1,t2,m
0,14,0,0,-2.034758,-2.034758,1,1,0
1,14,0,0,-0.314693,-0.314693,2,2,0
2,14,0,0,-1.677868,-1.677868,3,3,0
3,14,0,0,-2.425955,-2.425955,4,4,0
4,14,0,0,-3.253415,-3.253415,5,5,0
...,...,...,...,...,...,...,...,...
40680,9998,34,147,-1.059082,-0.450058,4,5,1
40681,9999,144,165,2.299374,2.512607,1,2,1
40682,9999,165,165,2.512607,0.681348,2,3,1
40683,9999,165,165,0.681348,1.910863,3,4,1


<class 'bipartitepandas.bipartiteeventstudy.BipartiteEventStudy'>


In [10]:
# We can also use event study data to retrive cross section data (cs=1 gives y1 as y1 for both stayers and movers; cs=0 gives y2 as y1 for only movers - this allows (almost) all income data to be accessed from the y1 column. Note that for movers, the last observation for each worker is not available without manipulation as it is shifted to the y2 column. Also note that the y1 row contains duplicates for all mover incomes, except for the first period.)
display(bdf.get_cs())

Unnamed: 0,i,j1,j2,y1,y2,t1,t2,m,cs
0,14,0,0,-2.034758,-2.034758,1,1,0,1
1,14,0,0,-0.314693,-0.314693,2,2,0,1
2,14,0,0,-1.677868,-1.677868,3,3,0,1
3,14,0,0,-2.425955,-2.425955,4,4,0,1
4,14,0,0,-3.253415,-3.253415,5,5,0,1
...,...,...,...,...,...,...,...,...,...
77940,9998,147,34,-0.450058,-1.059082,5,4,1,0
77941,9999,165,144,2.512607,2.299374,2,1,1,0
77942,9999,165,165,0.681348,2.512607,3,2,1,0
77943,9999,165,165,1.910863,0.681348,4,3,1,0


In [11]:
# Maybe we want to convert back into long form
bdf = bdf.to_long()
display(bdf)
# Note the type
print(type(bdf))

Unnamed: 0,i,j,y,t,m
0,0,145,1.661178,1,1
1,0,30,-1.193665,2,1
2,0,30,-0.037325,3,1
3,0,83,-0.474898,4,1
4,0,83,2.164489,5,1
...,...,...,...,...,...
49995,9999,144,2.299374,1,1
49996,9999,165,2.512607,2,1
49997,9999,165,0.681348,3,1
49998,9999,165,1.910863,4,1


<class 'bipartitepandas.bipartitelong.BipartiteLong'>


In [12]:
# Now suppose we want to collapse by employment spells (so any consecutive observations with the same worker in the same firm are collapsed into 1 observation)
bdf = bdf.to_collapsed_long()
display(bdf)
# Note the type
print(type(bdf))

Unnamed: 0,i,j,y,t1,t2,w,m
0,0,145,1.661178,1,1,1,1
1,0,30,-0.615495,2,3,2,1
2,0,83,0.844795,4,5,2,1
3,1,46,0.459030,1,2,2,1
4,1,50,-0.008153,3,5,3,1
...,...,...,...,...,...,...,...
29771,9998,34,-0.825938,3,4,2,1
29772,9998,147,-0.450058,5,5,1,1
29773,9999,144,2.299374,1,1,1,1
29774,9999,165,1.701606,2,4,3,1


<class 'bipartitepandas.bipartitelongcollapsed.BipartiteLongCollapsed'>


In [13]:
# We can then check out the event study using collapsed data
bdf = bdf.to_eventstudy()
display(bdf)
# Note the type
print(type(bdf))

Unnamed: 0,i,j1,j2,y1,y2,t11,t12,t21,t22,w1,w2,m
0,14,0,0,-1.941338,-1.941338,1,5,1,5,5.0,5,0
1,16,111,111,0.961920,0.961920,1,5,1,5,5.0,5,0
2,31,149,149,1.810867,1.810867,1,5,1,5,5.0,5,0
3,43,148,148,1.870104,1.870104,1,5,1,5,5.0,5,0
4,49,144,144,0.126350,0.126350,1,5,1,5,5.0,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...
20456,9998,19,5,-2.218008,-2.844433,1,1,2,2,1.0,1,1
20457,9998,5,34,-2.844433,-0.825938,2,2,3,4,1.0,2,1
20458,9998,34,147,-0.825938,-0.450058,3,4,5,5,2.0,1,1
20459,9999,144,165,2.299374,1.701606,1,1,2,4,1.0,3,1


<class 'bipartitepandas.bipartiteeventstudycollapsed.BipartiteEventStudyCollapsed'>


In [14]:
# We can then go back to collapsed long
bdf = bdf.to_long()
display(bdf)
# Note the type
print(type(bdf))

Unnamed: 0,i,j,y,t1,t2,w,m
0,0,145,1.661178,1,1,1.0,1
1,0,30,-0.615495,2,3,2.0,1
2,0,83,0.844795,4,5,2.0,1
3,1,46,0.459030,1,2,2.0,1
4,1,50,-0.008153,3,5,3.0,1
...,...,...,...,...,...,...,...
29771,9998,34,-0.825938,3,4,2.0,1
29772,9998,147,-0.450058,5,5,1.0,1
29773,9999,144,2.299374,1,1,1.0,1
29774,9999,165,1.701606,2,4,3.0,1


<class 'bipartitepandas.bipartitelongcollapsed.BipartiteLongCollapsed'>
