In [1]:
import pandas as pd
import networkx as nx

# Load the data from the CSV file

data = pd.read_csv('migration- total.csv')

# Create a directed graph
G = nx.DiGraph()

# Iterate over the rows of the dataframe and add edges with year-wise attributes
for index, row in data.iterrows():
    # Source and target nodes
    source, target = row['Source'], row['Target']

    # Adding nodes (if not already added)
    G.add_node(source)
    G.add_node(target)

    # Adding an edge with year-wise migration data as attributes
    G.add_edge(source, target, 
               weight_1990=row['year 1990'], 
               weight_1995=row['year 1995'], 
               weight_2000=row['year 2000'], 
               weight_2005=row['year 2005'], 
               weight_2010=row['year 2010'], 
               weight_2015=row['year 2015'])

# Optional: Check the number of nodes and edges in the graph
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
print(f'Number of nodes: {num_nodes}, Number of edges: {num_edges}')


Number of nodes: 232, Number of edges: 11228


In [4]:
# Recalculating Migration Trends Over the Years

# Correcting the dictionary keys to match the attribute names
migration_trends_corrected = {
    'weight_1990': 0, 
    'weight_1995': 0, 
    'weight_2000': 0, 
    'weight_2005': 0, 
    'weight_2010': 0, 
    'weight_2015': 0
}

# Summing the weights of all edges for each year
for u, v, attributes in G.edges(data=True):
    for year in migration_trends_corrected.keys():
        migration_trends_corrected[year] += attributes.get(year, 0)

migration_trends_corrected

{'weight_1990': 144183352,
 'weight_1995': 152646765,
 'weight_2000': 164748597,
 'weight_2005': 182416026,
 'weight_2010': 211672392,
 'weight_2015': 233916692}

In [5]:
# Identify Countries with the Highest Inbound or Outbound Migrations

# Initializing dictionaries to store total outbound and inbound migrations for each country
outbound_migration = {node: 0 for node in G.nodes()}
inbound_migration = {node: 0 for node in G.nodes()}

# Calculating total outbound migration
for u, v, attributes in G.edges(data=True):
    total_migration = sum(attributes.values())  # Sum of migration counts for all years
    outbound_migration[u] += total_migration

# Calculating total inbound migration
for u, v, attributes in G.edges(data=True):
    total_migration = sum(attributes.values())
    inbound_migration[v] += total_migration

# Identifying the top countries with the highest outbound and inbound migrations
top_outbound_countries = sorted(outbound_migration.items(), key=lambda x: x[1], reverse=True)[:5]
top_inbound_countries = sorted(inbound_migration.items(), key=lambda x: x[1], reverse=True)[:5]

top_outbound_countries, top_inbound_countries



([('Russian Federation', 66732561),
  ('India', 60360138),
  ('Mexico', 55130355),
  ('China', 40220546),
  ('Bangladesh', 36022947)],
 [('United States of America', 198572669),
  ('Russian Federation', 69827802),
  ('Germany', 55129412),
  ('France', 39982477),
  ('Saudi Arabia', 39141532)])