In [66]:
import pandas as pd

### Refugee stats for each bar
- Avg 9 years:  67,909 
- 2018:  22,491 (45,000 cap)
- 2019:  30,000 cap

In [116]:
# Create a basic dataframe for each bar with 1 record per person
df_a = pd.DataFrame(data={'bar_index':list(range(67909))})
df_b = pd.DataFrame(data={'bar_index':list(range(45000))})
df_c = pd.DataFrame(data={'bar_index':list(range(30000))})

In [119]:
# Add a label for each set
df_a['bar_label'] = 'bar_a'
df_b['bar_label'] = 'bar_b'
df_c['bar_label'] = 'bar_c'

In [133]:
# Set a bit flag for whether a record represents a refugee or not
df_a['refugee_flag'] = 1
df_b['refugee_flag'] = df_b['bar_index'].apply(lambda x: 1 if x <= 22491 else 0)
df_c['refugee_flag'] = 0

In [124]:
# Set a different number for each different group, in case we want to use a different color for each
df_a['color_group'] = 1
df_b['color_group'] = df_b['bar_index'].apply(lambda x: 2 if x <= 22491 else 3)
df_c['color_group'] = 4

In [154]:
# Change shape of bar by changing parameters
bar_width = 100
bar_space = 0.1 # must be in tenths, space between bars

# First bar starts at x=0
a_start = 0
# 2nd bar starts at this point
b_start = (1 + bar_space) * bar_width
# 3rd bar starts at this point
c_start = (2 + 2*bar_space) * bar_width
print(a_start, b_start, c_start)

0 110.00000000000001 220.00000000000003


In [155]:
# Set x position for each record
df_a['x_pos'] = df_a['bar_index'].apply(lambda x: x % bar_width + a_start)
df_b['x_pos'] = df_b['bar_index'].apply(lambda x: x % bar_width + b_start)
df_c['x_pos'] = df_c['bar_index'].apply(lambda x: x % bar_width + c_start)

In [156]:
# Set y position for each record
df_a['y_pos'] = df_a['bar_index'].apply(lambda x: x // bar_width)
df_b['y_pos'] = df_b['bar_index'].apply(lambda x: x // bar_width)
df_c['y_pos'] = df_c['bar_index'].apply(lambda x: x // bar_width)

In [157]:
# Union the 3 datasets
df_bars = pd.concat([df_a, df_b, df_c])
df_bars.reset_index(drop=True)
df_bars.head()

Unnamed: 0,bar_index,bar_label,refugee_flag,color_group,x_pos,y_pos
0,0,bar_a,1,1,0.0,0
1,1,bar_a,1,1,1.0,0
2,2,bar_a,1,1,2.0,0
3,3,bar_a,1,1,3.0,0
4,4,bar_a,1,1,4.0,0


In [158]:
# Confirm the correct number of records
df_bars.shape

(142909, 6)

In [159]:
# Save to csv
df_bars.to_csv('iron_viz_bars.csv')