Simulating OSS DATA

In [19]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

# Assuming you have a DataFrame 'original_data' with your existing data
# For demonstration purposes, I'll create a simplified example DataFrame
data = {
    'Date (week ending)': ['30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023', '23/07/2023', '23/07/2023', '23/07/2023', '23/07/2023'],
    'Location': ['Bama', 'Dikwa', 'Mafa', 'Katagum', 'Maduganari', 'Yola North', 'Monguno', 'Mobbar', 'Gwoza', 'Dikwa'],
    'States': ['Borno', 'Borno', 'Borno', 'Bauchi', 'Borno', 'Adamawa', 'Borno', 'Borno', 'Borno', 'Borno'],
    'Category': ['Armed conflict', 'Armed conflict', 'Armed conflict', 'Civil unrest', 'Criminality', 'Criminality', 'Armed conflict', 'Military operations', 'Terrorism', 'Farmers/IDP relocation'],
    'OSS Highlights': [
        'Armed attacks were reported in Bama, Dikwa, and Mafa, LGA, Borno State.',
        'Armed attacks were reported in Bama, Dikwa, and Mafa, LGA, Borno State.',
        'Armed attacks were reported in Bama, Dikwa, and Mafa, LGA, Borno State.',
        '',  # Replace empty string randomly
        '',
        '47 arrested in Adamawa as youth carrying machetes and baton sticks vandalize Adamawa state government house, airport, warehouses and shops in Yola North and South.',
        'MOPOL stabbed to death at the Artillery Mammy Market in Maiduguri by a suspected drug dealer.',
        '',  # Replace empty string randomly
        '',
        ''
    ],
    'Casualty': np.random.randint(0, 21, size=10),
    'Injuries': np.random.randint(0, 11, size=10),
    'Fatality': np.random.randint(0, 16, size=10),
    'Other': np.random.randint(0, 48, size=10)
}

original_data = pd.DataFrame(data)

# Replace empty strings in 'OSS Highlights' randomly with existing non-empty values
non_empty_highlights = original_data.loc[original_data['OSS Highlights'] != '', 'OSS Highlights']
empty_indices = original_data[original_data['OSS Highlights'] == ''].index

original_data.loc[empty_indices, 'OSS Highlights'] = np.random.choice(non_empty_highlights, len(empty_indices))

# Data augmentation using bootstrapping
augmented_data = pd.concat([resample(original_data) for _ in range(5000 // len(original_data))])

# Optionally, you can reset the index of the augmented DataFrame
augmented_data.reset_index(drop=True, inplace=True)

# Now 'augmented_data' contains your simulated dataset with 5000 rows

# Check the first few rows of the augmented dataset
print(augmented_data.head())


  Date (week ending)    Location  States        Category  \
0         30/07/2023        Mafa   Borno  Armed conflict   
1         30/07/2023  Maduganari   Borno     Criminality   
2         30/07/2023        Bama   Borno  Armed conflict   
3         30/07/2023     Katagum  Bauchi    Civil unrest   
4         30/07/2023        Bama   Borno  Armed conflict   

                                      OSS Highlights  Casualty  Injuries  \
0  Armed attacks were reported in Bama, Dikwa, an...         9        10   
1  MOPOL stabbed to death at the Artillery Mammy ...        15         5   
2  Armed attacks were reported in Bama, Dikwa, an...         8         6   
3  Armed attacks were reported in Bama, Dikwa, an...        12         9   
4  Armed attacks were reported in Bama, Dikwa, an...         8         6   

   Fatality  Other  
0        12     34  
1        11     30  
2         2     14  
3         6     16  
4         2     14  


In [12]:
#checks for nan values
null_value = augmented_data.isnull().any()
print(f'columns with nan values are:{null_value}') #prints columns with Nan values

columns with nan values are:Date (week ending)    False
Location              False
States                False
Category              False
OSS Highlights        False
Casualty              False
Injuries              False
Fatality              False
Other                 False
dtype: bool


In [13]:
augmented_data

Unnamed: 0,Date (week ending),Location,States,Category,OSS Highlights,Casualty,Injuries,Fatality,Other
0,30/07/2023,Mafa,Borno,Armed conflict,"Armed attacks were reported in Bama, Dikwa, an...",3,1,14,40
1,23/07/2023,Mobbar,Borno,Military operations,MOPOL stabbed to death at the Artillery Mammy ...,1,5,9,40
2,30/07/2023,Bama,Borno,Armed conflict,"Armed attacks were reported in Bama, Dikwa, an...",4,8,3,38
3,23/07/2023,Gwoza,Borno,Terrorism,47 arrested in Adamawa as youth carrying mache...,15,5,0,28
4,30/07/2023,Maduganari,Borno,Criminality,"Armed attacks were reported in Bama, Dikwa, an...",10,5,8,46
...,...,...,...,...,...,...,...,...,...
4995,23/07/2023,Gwoza,Borno,Terrorism,47 arrested in Adamawa as youth carrying mache...,15,5,0,28
4996,23/07/2023,Dikwa,Borno,Farmers/IDP relocation,"Armed attacks were reported in Bama, Dikwa, an...",1,1,9,21
4997,30/07/2023,Yola North,Adamawa,Criminality,47 arrested in Adamawa as youth carrying mache...,10,0,1,46
4998,23/07/2023,Monguno,Borno,Armed conflict,MOPOL stabbed to death at the Artillery Mammy ...,6,3,1,1


In [15]:
# Export to CSV
augmented_data.to_csv('simulated_OSS1_data.csv', index=False)

CCCM DATA SIMULATION

In [None]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

# Simulated data
data1 = {
    'Date (week ending)': [
        '07/07/2023', '07/07/2023', '07/07/2023', '07/07/2023', '07/07/2023',
        '07/07/2023', '07/07/2023', '07/07/2023'
    ],
    'State': [
        'Borno', 'Borno', 'Borno', 'Borno', 'Borno',
        'Adamawa', 'Adamawa', 'Adamawa'
    ],
    'Location': [
        'Maiduguri', 'Bama', 'Damboa', 'Dikwa', 'Mongonu',
        'Yola', 'Demsa', 'Fufore'
    ],
    'Individuals in IDP Camps and Reception Centres reached with hygiene promotion, fire safety, flood mitigation and messages to prevent cholera':
        np.random.randint(100, 1000, size=8),
    'Individuals sensitized, on the benefits of having women and girls in camp governance structure; the GBV aspect of sexual exploitation and abuse; and intimate partner violence and denial of resources':
        np.random.randint(100, 1000, size=8),
    'partially damaged shelters reinforced':
        np.random.randint(10, 100, size=8),
    'partially damaged latrines/showers rehabilitated':
        np.random.randint(10, 100, size=8),
    'CFM received':
        np.random.randint(100, 1000, size=8),
    'CFM resolved':
        np.random.randint(100, 1000, size=8)
}

# Create a DataFrame
original_data1 = pd.DataFrame(data1)

# Data augmentation using bootstrapping
augmented_data1 = pd.concat([resample(original_data1) for _ in range(5000 // len(original_data1))])

# Optionally, you can reset the index of the augmented DataFrame
augmented_data1.reset_index(drop=True, inplace=True)

# Display the simulated data
print(augmented_data1)


In [21]:
augmented_data1

Unnamed: 0,Date (week ending),State,Location,"Individuals in IDP Camps and Reception Centres reached with hygiene promotion, fire safety, flood mitigation and messages to prevent cholera","Individuals sensitized, on the benefits of having women and girls in camp governance structure; the GBV aspect of sexual exploitation and abuse; and intimate partner violence and denial of resources",partially damaged shelters reinforced,partially damaged latrines/showers rehabilitated,CFM received,CFM resolved
0,07/07/2023,Adamawa,Yola,226,170,68,71,131,361
1,07/07/2023,Adamawa,Yola,226,170,68,71,131,361
2,07/07/2023,Adamawa,Fufore,400,786,35,67,928,517
3,07/07/2023,Adamawa,Fufore,400,786,35,67,928,517
4,07/07/2023,Borno,Damboa,750,453,75,28,681,665
...,...,...,...,...,...,...,...,...,...
4995,07/07/2023,Borno,Dikwa,498,688,14,16,632,319
4996,07/07/2023,Adamawa,Yola,226,170,68,71,131,361
4997,07/07/2023,Adamawa,Demsa,577,582,49,65,582,621
4998,07/07/2023,Adamawa,Yola,226,170,68,71,131,361


In [22]:
# Export to CSV
augmented_data1.to_csv('simulated_CCCM_data.csv', index=False)

WASH DATA SIMULATION

In [25]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

# Simulated data
data2 = {
    'Date (week ending)': [
        '30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023',
        '30/07/2023', '30/07/2023', '30/07/2023', '23/07/2023', '23/07/2023',
        '23/07/2023', '23/07/2023', '23/07/2023', '23/07/2023', '23/07/2023',
        '23/07/2023', '23/07/2023', '30/07/2023', '30/07/2023', '30/07/2023',
        '30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023', '30/07/2023',
        '30/07/2023', '30/07/2023'
    ],
    'State': [
        'Borno', 'Borno', 'Borno', 'Borno', 'Borno',
        'Borno', 'Borno', 'Borno', 'Borno', 'Borno',
        'Borno', 'Borno', 'Borno', 'Adamawa', 'Borno',
        'Borno', 'Adamawa', 'Borno', 'Borno', 'Borno'
    ],
    'LGA': [
        'Bama', 'Bama', 'Damasak', 'Dikwa', 'Mafa',
        'Pulka', 'Yola', 'Borno', 'Bama', 'Damasak',
        'Dikwa', 'Bama', 'Pulka', 'Kaga', 'Yola North',
        'Maiduguri', 'Bama', 'Mobbar', 'Dikwa', 'Banki'
    ],
    'Location': [
        'Bama', 'Banki', 'Damasak', 'Dikwa', 'Mafa',
        'Pulka', 'Yola', 'Borno', 'Bama', 'Damasak',
        'Dikwa', 'Banki', 'Pulka', 'Kaga', 'Yola North',
        'Maiduguri', 'Bama', 'Mobbar', 'Dikwa', 'Bama'
    ],
    'Litres of water provided to household':
        np.random.randint(100, 1000, size=20),
    'Litres of water provided to individuals':
        np.random.randint(100, 1000, size=20),
    'latrines maintained':
        np.random.randint(50, 500, size=20),
    'showers maintained':
        np.random.randint(50, 500, size=20),
    'latrines constructed':
        np.random.randint(50, 500, size=20),
    'showers constructed':
        np.random.randint(50, 500, size=20),
    'water sources constructed':
        np.random.randint(50, 500, size=20),
    'latrines desludged':
        np.random.randint(50, 500, size=20),
    'individuals reached with key hygiene promotion messages':
        np.random.randint(100, 1000, size=20),
    'children reached with Hygiene club activities':
        np.random.randint(50, 500, size=20),
    'individuals reached in radio listening sessions on hygiene promotion messages':
        np.random.randint(50, 500, size=20),
    'persons participated in sanitation activities':
        np.random.randint(50, 500, size=20),
    'Girls and women reached in Menstrual Hygiene Management sessions':
        np.random.randint(50, 500, size=20),
    'hygiene kits distributed':
        np.random.randint(50, 500, size=20),
    'replenishment kits distributed':
        np.random.randint(50, 500, size=20),
    'WATSAN members trained':
        np.random.randint(50, 500, size=20),
}

# Check the lengths of the arrays
array_lengths = [len(value) for value in data2.values()]
print("Array Lengths:", array_lengths)

# Extend the other arrays to match the length of 'Date (week ending)'
extended_length = len(data2['Date (week ending)'])
for key in data2.keys():
    data2[key] = np.concatenate([data2[key], np.random.choice(data2[key], extended_length - len(data2[key]))])

# Check the lengths of the arrays
array_lengths = [len(value) for value in data2.values()]

print("Array Lengths:", array_lengths)
# Create a DataFrame
original_data2 = pd.DataFrame(data2)

# Data augmentation using bootstrapping
augmented_data2 = pd.concat([resample(original_data2) for _ in range(5000 // len(original_data2))])

# Optionally, you can reset the index of the augmented DataFrame
augmented_data2.reset_index(drop=True, inplace=True)

# Display the simulated data
print(augmented_data2)

Array Lengths: [27, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]
Array Lengths: [27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27]
     Date (week ending)  State         LGA    Location  \
0            30/07/2023  Borno       Dikwa   Maiduguri   
1            23/07/2023  Borno        Bama        Bama   
2            30/07/2023  Borno       Dikwa       Dikwa   
3            30/07/2023  Borno       Dikwa       Dikwa   
4            30/07/2023  Borno     Damasak     Damasak   
...                 ...    ...         ...         ...   
4990         23/07/2023  Borno       Dikwa       Dikwa   
4991         23/07/2023  Borno        Bama       Banki   
4992         23/07/2023  Borno  Yola North  Yola North   
4993         30/07/2023  Borno       Dikwa        Yola   
4994         30/07/2023  Borno     Damasak     Damasak   

      Litres of water provided to household  \
0                                       128   
1                

In [26]:
augmented_data2

Unnamed: 0,Date (week ending),State,LGA,Location,Litres of water provided to household,Litres of water provided to individuals,latrines maintained,showers maintained,latrines constructed,showers constructed,water sources constructed,latrines desludged,individuals reached with key hygiene promotion messages,children reached with Hygiene club activities,individuals reached in radio listening sessions on hygiene promotion messages,persons participated in sanitation activities,Girls and women reached in Menstrual Hygiene Management sessions,hygiene kits distributed,replenishment kits distributed,WATSAN members trained
0,30/07/2023,Borno,Dikwa,Maiduguri,128,376,199,92,273,150,272,94,561,147,271,201,360,154,80,128
1,23/07/2023,Borno,Bama,Bama,526,560,449,119,81,471,327,94,642,297,269,457,211,458,421,128
2,30/07/2023,Borno,Dikwa,Dikwa,326,246,103,285,365,245,377,467,438,77,205,124,326,326,449,128
3,30/07/2023,Borno,Dikwa,Dikwa,326,246,103,285,365,245,377,467,438,77,205,124,326,326,449,128
4,30/07/2023,Borno,Damasak,Damasak,829,126,352,52,339,245,432,206,437,252,205,284,126,154,80,222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,23/07/2023,Borno,Dikwa,Dikwa,390,111,198,91,140,486,432,153,236,128,189,201,360,404,309,138
4991,23/07/2023,Borno,Bama,Banki,399,949,199,334,98,340,103,206,362,232,71,468,335,64,239,138
4992,23/07/2023,Borno,Yola North,Yola North,648,590,105,52,397,136,174,273,279,306,176,277,106,343,287,470
4993,30/07/2023,Borno,Dikwa,Yola,326,126,378,440,365,128,401,308,129,127,205,354,311,326,449,128


In [27]:
# Export to CSV
augmented_data2.to_csv('simulated_WASH_data.csv', index=False)

NFI SHELTER DATA SIMULATION

In [28]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

# Simulate IOM data
data3 = {
    'Date (week ending)': ['30/07/2023'] * 11,
    'State': ['Borno'] * 11,
    'LGA': ['Bama', 'Dikwa', 'Fufore', 'Gwoza', 'Jere', 'Maiduguri', 'Monguno', 'Ngala', 'Nganzai', 'Gwoza', 'Borno'],
    'NFI kits distributed to households': np.random.randint(300, 800, size=11),
    'NFI kits distributed to individuals': np.random.randint(1, 2, size=11),
    'Shelter repair kits distributed to households': np.random.randint(300, 800, size=11),
    'Shelter repair kits distributed to individuals': np.random.randint(30, 100, size=11),
    'Mudbrick shelters constructed for households': np.random.randint(300, 800, size=11),
    'Mudbrick shelters constructed for individuals': np.random.randint(30, 100, size=11),
    'Emergency shelters constructed for households': np.random.randint(300, 800, size=11),
    'Emergency shelters constructed for individuals': np.random.randint(30, 100, size=11),
    'Mudbrick shelters reinforced for households': np.random.randint(1, 5, size=11),
    'Mudbrick shelters reinforced for individuals': np.random.randint(300, 800, size=11),
    'Emergency shelters reinforced for households': np.random.randint(1, 5, size=11),
    'Emergency shelters reinforced for individuals': np.random.randint(300, 800, size=11),
}

# Create a DataFrame
original_data3 = pd.DataFrame(data3)

# Data augmentation using bootstrapping
augmented_data3 = pd.concat([resample(original_data3) for _ in range(5000 // len(original_data3))])

# Optionally, you can reset the index of the augmented DataFrame
augmented_data3.reset_index(drop=True, inplace=True)

# Display the simulated data
print(augmented_data3)


     Date (week ending)  State        LGA  NFI kits distributed to households  \
0            30/07/2023  Borno  Maiduguri                                 364   
1            30/07/2023  Borno      Gwoza                                 339   
2            30/07/2023  Borno       Bama                                 592   
3            30/07/2023  Borno      Dikwa                                 406   
4            30/07/2023  Borno    Monguno                                 611   
...                 ...    ...        ...                                 ...   
4989         30/07/2023  Borno  Maiduguri                                 364   
4990         30/07/2023  Borno     Fufore                                 407   
4991         30/07/2023  Borno      Gwoza                                 339   
4992         30/07/2023  Borno      Ngala                                 314   
4993         30/07/2023  Borno    Nganzai                                 676   

      NFI kits distributed 

In [29]:
augmented_data3

Unnamed: 0,Date (week ending),State,LGA,NFI kits distributed to households,NFI kits distributed to individuals,Shelter repair kits distributed to households,Shelter repair kits distributed to individuals,Mudbrick shelters constructed for households,Mudbrick shelters constructed for individuals,Emergency shelters constructed for households,Emergency shelters constructed for individuals,Mudbrick shelters reinforced for households,Mudbrick shelters reinforced for individuals,Emergency shelters reinforced for households,Emergency shelters reinforced for individuals
0,30/07/2023,Borno,Maiduguri,364,1,700,32,424,63,687,76,3,752,2,427
1,30/07/2023,Borno,Gwoza,339,1,441,40,434,91,734,34,3,685,2,771
2,30/07/2023,Borno,Bama,592,1,358,65,622,77,730,52,3,692,2,610
3,30/07/2023,Borno,Dikwa,406,1,504,62,606,73,314,57,2,713,4,490
4,30/07/2023,Borno,Monguno,611,1,464,86,620,62,627,77,2,587,3,651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4989,30/07/2023,Borno,Maiduguri,364,1,700,32,424,63,687,76,3,752,2,427
4990,30/07/2023,Borno,Fufore,407,1,775,99,354,45,750,59,4,309,2,712
4991,30/07/2023,Borno,Gwoza,339,1,441,40,434,91,734,34,3,685,2,771
4992,30/07/2023,Borno,Ngala,314,1,637,43,649,33,342,32,4,612,1,621


In [30]:
# Export to CSV
augmented_data3.to_csv('simulated_NFISHELTER_data.csv', index=False)

DTM DATA SIMULATION

In [38]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

# Simulate IOM data
data_iom = {
    'Date (week ending)': [],
    'State': [],
    'LGA': [],
    'Location': [],
    'Activity type': [],
    'Movements Observed': [],
    'ETT Report Produced': [],
    'Households Registered': [],
    'Households Verified': [],
    'Individuals': [],
    'Men': [],
    'Women': [],
    'Boys': [],
    'Girls': [],
    'Flow monitoring points': [],
    'Inflow': [],
    'Outflow': [],
    'MUAC': []
}

# Define the number of rows
num_rows = 34

# Generate data
for _ in range(num_rows):
    data_iom['Date (week ending)'].append('07/07/2023')
    data_iom['State'].append(np.random.choice(['Kano', 'Sokoto', 'Borno', 'Bauchi', 'Gombe', 'Adamawa', 'Taraba']))
    data_iom['LGA'].append(np.random.choice(['Albasu', 'Bebeji', 'Gwazo', 'Karaye', 'Kiru', 'Ajingi', 'Bichi', 'Danbatta', 'Gezawe', 'Gwale',
                                             'Tarauni', 'Tsanyawa', 'Kura', 'Dawakin Tofa', 'Kumbotso', 'Nasarawa', 'Binji', 'Bodinga', 'Shagari',
                                             'Ilela', 'Tambuwal', 'Maiduguri', 'Bama', 'Damboa', 'Dikwa', 'Mongonu', 'Ngala', 'Nganzai', 'Jere',
                                             'Kala-Balge', 'Alkaleri', 'Bogoro', 'Darazo', 'Giade', 'Katagum', 'Akko', 'Balanga', 'Billiri', 'Dukku',
                                             'Kaltungo', 'Yola', 'Demsa', 'Fufore', 'Girei', 'Gombi', 'Madagali', 'Ardo Kola', 'Donga', 'Gashaka', 'Wukari']))
    data_iom['Location'].append('Nil')
    data_iom['Activity type'].append(np.random.choice(['Flow monitoring', 'Displacement']))
    data_iom['Movements Observed'].append(np.random.randint(16, 98))
    data_iom['ETT Report Produced'].append(1)
    data_iom['Households Registered'].append(np.random.randint(234, 798))
    data_iom['Households Verified'].append(np.random.randint(23, 89))
    data_iom['Individuals'].append(np.random.randint(25, 975))
    data_iom['Men'].append(np.random.randint(36, 963))
    data_iom['Women'].append(np.random.randint(346, 975))
    data_iom['Boys'].append(np.random.randint(457, 963))
    data_iom['Girls'].append(np.random.randint(443, 907))
    data_iom['Flow monitoring points'].append(np.random.randint(1, 5))
    data_iom['Inflow'].append(np.random.randint(234, 980))
    data_iom['Outflow'].append(np.random.randint(456, 3456))
    data_iom['MUAC'].append(0)

# Create a DataFrame
original_data_iom = pd.DataFrame(data_iom)

# Data augmentation using bootstrapping
augmented_data_iom = pd.concat([resample(original_data_iom) for _ in range(5000 // num_rows)])

# Optionally, you can reset the index of the augmented DataFrame
augmented_data_iom.reset_index(drop=True, inplace=True)

# Display the simulated data
print(augmented_data_iom)


     Date (week ending)    State       LGA Location    Activity type  \
0            07/07/2023     Kano      Bama      Nil     Displacement   
1            07/07/2023   Bauchi  Danbatta      Nil     Displacement   
2            07/07/2023    Gombe  Madagali      Nil  Flow monitoring   
3            07/07/2023     Kano     Giade      Nil  Flow monitoring   
4            07/07/2023    Borno    Bogoro      Nil     Displacement   
...                 ...      ...       ...      ...              ...   
4993         07/07/2023   Sokoto   Nganzai      Nil     Displacement   
4994         07/07/2023   Sokoto   Nganzai      Nil     Displacement   
4995         07/07/2023   Bauchi   Tarauni      Nil     Displacement   
4996         07/07/2023  Adamawa  Tambuwal      Nil     Displacement   
4997         07/07/2023   Bauchi  Danbatta      Nil     Displacement   

      Movements Observed  ETT Report Produced  Households Registered  \
0                     72                    1                  

In [39]:
augmented_data_iom

Unnamed: 0,Date (week ending),State,LGA,Location,Activity type,Movements Observed,ETT Report Produced,Households Registered,Households Verified,Individuals,Men,Women,Boys,Girls,Flow monitoring points,Inflow,Outflow,MUAC
0,07/07/2023,Kano,Bama,Nil,Displacement,72,1,416,88,851,940,570,769,526,3,365,866,0
1,07/07/2023,Bauchi,Danbatta,Nil,Displacement,56,1,671,75,49,872,791,557,790,1,340,2405,0
2,07/07/2023,Gombe,Madagali,Nil,Flow monitoring,45,1,733,85,640,196,962,557,753,4,367,2037,0
3,07/07/2023,Kano,Giade,Nil,Flow monitoring,36,1,470,68,95,650,566,582,558,3,338,3396,0
4,07/07/2023,Borno,Bogoro,Nil,Displacement,33,1,288,47,553,595,719,646,685,3,664,3083,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4993,07/07/2023,Sokoto,Nganzai,Nil,Displacement,81,1,393,31,968,868,960,924,824,3,629,2285,0
4994,07/07/2023,Sokoto,Nganzai,Nil,Displacement,81,1,393,31,968,868,960,924,824,3,629,2285,0
4995,07/07/2023,Bauchi,Tarauni,Nil,Displacement,62,1,661,37,711,336,822,962,659,1,779,2129,0
4996,07/07/2023,Adamawa,Tambuwal,Nil,Displacement,75,1,582,50,91,731,436,796,887,3,933,3048,0


In [40]:
# Export to CSV
augmented_data_iom.to_csv('simulated_DTM_data.csv', index=False)

MHPSS DATA SIMULATION

In [46]:
import pandas as pd
import numpy as np
from sklearn.utils import resample

# Define the data structure
data_iom5 = {
    'Date (week ending)': ['07/07/2023'] * 70 + ['23/07/2023'] * 70,
    'State': ['Borno'] * 70 + ['Adamawa'] * 70,
    'LGA': np.random.choice(['Maiduguri', 'Bama', 'Damboa', 'Dikwa', 'Mongonu', 'Ngala', 'Nganzai', 'Jere', 'Kala-Balge', 'Yola', 'Demsa', 'Fufore', 'Girei', 'Gombi', 'Madagali', 'Ikara', 'Kudan', 'Lere', 'Makarfi', 'Sanga', 'Busari', 'Bade', 'Damaturu', 'Fika', 'Fune', 'Geidam', 'Bassa', 'Kanam', 'Barkin Ladi', 'Wase', 'Jos', 'Lafiya', 'Akwanga', 'Karu', 'Wamba', 'Keffi'], size=140),
    'individuals reached through various MHPSS/Protection services and activities': np.random.randint(300, 800, size=140),
    'individuals reached through GBV awareness sessions and other related activities': np.random.randint(30, 90, size=140),
    'individuals reached through counter-trafficking related activities': np.random.randint(300, 800, size=140),
    'flow monitoring points': np.random.randint(100, 300, size=140),
    'individuals trained': np.random.randint(500, 1000, size=140),
    'Dignity kits distributed': np.random.randint(500, 1200, size=140),
}

# Create a DataFrame
original_data_iom5 = pd.DataFrame(data_iom5)

# Data augmentation using bootstrapping
augmented_data_iom5 = pd.concat([resample(original_data_iom5) for _ in range(5000 // len(original_data_iom5))])


In [47]:
augmented_data_iom5

Unnamed: 0,Date (week ending),State,LGA,individuals reached through various MHPSS/Protection services and activities,individuals reached through GBV awareness sessions and other related activities,individuals reached through counter-trafficking related activities,flow monitoring points,individuals trained,Dignity kits distributed
103,23/07/2023,Adamawa,Bade,723,67,600,272,853,534
56,07/07/2023,Borno,Wamba,547,30,640,134,895,570
10,07/07/2023,Borno,Bassa,466,79,636,219,879,1092
27,07/07/2023,Borno,Wase,505,70,707,233,793,961
34,07/07/2023,Borno,Keffi,407,51,333,207,565,1030
...,...,...,...,...,...,...,...,...,...
69,07/07/2023,Borno,Girei,312,66,398,259,937,843
36,07/07/2023,Borno,Wamba,559,32,396,273,918,828
23,07/07/2023,Borno,Jos,549,32,796,265,712,964
57,07/07/2023,Borno,Madagali,761,51,729,265,554,631


In [48]:
augmented_data_iom5.shape

(4900, 9)

In [49]:
# Export to CSV
augmented_data_iom5.to_csv('simulated_MHPSS_data.csv', index=False)

ERL DATA SIM

In [50]:
import pandas as pd
import numpy as np

# Define the data structure
data_iom_skills = {
    'Date (week ending)': ['07/07/2023', '08/07/2023', '09/07/2023', '10/07/2023', '11/07/2023', '12/07/2023', '13/07/2023', '14/07/2023', '15/07/2023'],
    'State': ['Borno'] * 9,
    'LGA': ['Maiduguri', 'Bama', 'Damboa', 'Dikwa', 'Mongonu', 'Ngala', 'Nganzai', 'Jere', 'Kala-Balge'],
    'individuals benefitted from skills acquisition training': np.random.randint(200, 800, size=9),
    'individuals received briquettes and fuel-efficient stoves': np.random.randint(300, 1000, size=9),
    'individuals received NFI': np.random.randint(200, 800, size=9),
}

# Create a DataFrame
original_data_iom_skills = pd.DataFrame(data_iom_skills)

# Data augmentation using bootstrapping
augmented_data_iom_skills = pd.concat([resample(original_data_iom_skills) for _ in range(5000 // len(original_data_iom_skills))])


In [51]:
augmented_data_iom_skills

Unnamed: 0,Date (week ending),State,LGA,individuals benefitted from skills acquisition training,individuals received briquettes and fuel-efficient stoves,individuals received NFI
7,14/07/2023,Borno,Jere,687,364,486
3,10/07/2023,Borno,Dikwa,746,984,465
1,08/07/2023,Borno,Bama,716,541,643
8,15/07/2023,Borno,Kala-Balge,495,881,371
3,10/07/2023,Borno,Dikwa,746,984,465
...,...,...,...,...,...,...
6,13/07/2023,Borno,Nganzai,492,723,485
2,09/07/2023,Borno,Damboa,531,824,386
0,07/07/2023,Borno,Maiduguri,629,993,324
8,15/07/2023,Borno,Kala-Balge,495,881,371


In [52]:
# Export to CSV
augmented_data_iom_skills.to_csv('simulated_ERL_data.csv', index=False)

HH SIMULATION

In [53]:
import pandas as pd
import numpy as np

# Define the data structure
data_iom_ets = {
    'Date (week ending)': ['07/07/2023', '08/07/2023', '09/07/2023', '10/07/2023', '11/07/2023', '12/07/2023', '13/07/2023', '14/07/2023', '15/07/2023'],
    'State': ['Borno'] * 9,
    'LGA': ['Maiduguri', 'Bama', 'Damboa', 'Dikwa', 'Mongonu', 'Ngala', 'Nganzai', 'Jere', 'Kala-Balge'],
    'bed nights recorded': np.random.randint(200, 800, size=9),
    'bed nights recorded from the beginning of the year': np.random.randint(200, 900, size=9),
    'average daily occupancy recorded': np.random.randint(200, 800, size=9),
    'average daily occupancy recorded from the beginning of the year': np.random.randint(200, 900, size=9),
    'organizations provided with office spaces': np.random.randint(200, 800, size=9),
    'office spaces provided from the beginning of the year': np.random.randint(200, 900, size=9),
    'organizations provided with meeting spaces': np.random.randint(200, 800, size=9),
    'meeting spaces provided from the beginning of the year': np.random.randint(200, 900, size=9),
    'individuals from organizations provided with emergency telecommunication sector (ETS) service': np.random.randint(200, 800, size=9),
    'individuals from organizations provided with ETS service from the beginning of the year': np.random.randint(200, 900, size=9),
    'organizations served': np.random.randint(200, 800, size=9),
    'organizations served from the beginning of the year': np.random.randint(200, 900, size=9),
}

# Create a DataFrame
original_data_iom_ets = pd.DataFrame(data_iom_ets)

# Data augmentation using bootstrapping
augmented_data_iom_ets = pd.concat([resample(original_data_iom_ets) for _ in range(5000 // len(original_data_iom_ets))])


In [54]:
augmented_data_iom_ets

Unnamed: 0,Date (week ending),State,LGA,bed nights recorded,bed nights recorded from the beginning of the year,average daily occupancy recorded,average daily occupancy recorded from the beginning of the year,organizations provided with office spaces,office spaces provided from the beginning of the year,organizations provided with meeting spaces,meeting spaces provided from the beginning of the year,individuals from organizations provided with emergency telecommunication sector (ETS) service,individuals from organizations provided with ETS service from the beginning of the year,organizations served,organizations served from the beginning of the year
3,10/07/2023,Borno,Dikwa,267,584,460,790,235,742,206,750,444,891,629,442
4,11/07/2023,Borno,Mongonu,544,602,568,645,357,702,783,807,652,723,679,210
0,07/07/2023,Borno,Maiduguri,686,477,458,821,468,522,498,345,282,741,592,589
6,13/07/2023,Borno,Nganzai,625,441,643,509,444,665,369,522,749,350,691,278
3,10/07/2023,Borno,Dikwa,267,584,460,790,235,742,206,750,444,891,629,442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,08/07/2023,Borno,Bama,392,721,342,828,304,398,632,834,285,423,755,369
7,14/07/2023,Borno,Jere,435,678,353,347,661,288,778,242,359,682,530,822
0,07/07/2023,Borno,Maiduguri,686,477,458,821,468,522,498,345,282,741,592,589
0,07/07/2023,Borno,Maiduguri,686,477,458,821,468,522,498,345,282,741,592,589


In [55]:
# Export to CSV
augmented_data_iom_ets.to_csv('simulated_HH_data.csv', index=False)