In [1]:
import neo4j

import pandas as pd

from IPython.display import display

In [2]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

# Functions

In [3]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [4]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [5]:
def my_neo4j_nodes_relationships():
    "print all the nodes and relationships"
   
    print("-------------------------")
    print("  Nodes:")
    print("-------------------------")
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    display(df)
    
    print("-------------------------")
    print("  Relationships:")
    print("-------------------------")
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    display(df)
    
    density = (2 * number_relationships) / (number_nodes * (number_nodes - 1))
    
    print("-------------------------")
    print("  Density:", f'{density:.1f}')
    print("-------------------------")
    

# EDA

<b> Refugees: </b>Refugees include individuals recognized under the 1951 Convention relating to the Status of Refugees, its 1967 Protocol, the 1969 Organization of African Unity (OAU) Convention Governing the Specific Aspects of Refugee Problems in Africa, the refugee definition contained in the 1984 Cartagena Declaration on Refugees as incorporated into national laws, those recognized in accordance with the UNHCR Statute, individuals granted complementary forms of protection, and those enjoying temporary protection. The refugee population also includes people in refugee-like situations.

<i>People in a refugee-like situation refers to a category which is descriptive in nature and includes groups of people who are outside their country or territory of origin and who face protection risks similar to those of refugees, but <u>for whom refugee status has, for practical or other reasons, not been ascertained.</u></i>

<b> Asylum Seekers: </b>Asylum-seekers are individuals who have sought international protection and whose claims for refugee status have not yet been determined.

<b>Internally displaced persons (IDPs)</b> are persons or groups of persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of, or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters, and who have not crossed an internationally recognized State border. For the purposes of UNHCR’s statistics, this population includes only conflict-generated IDPs to whom the Office extends protection and/or assistance. The IDP population also includes people in an IDP-like situation.

<i>People in an IDP-like situation refers to a category which is descriptive in nature and includes groups of people who are inside their country of nationality or habitual residence and who face protection risks similar to those of IDPs but <u>who, for practical or other reasons, could not be reported as such.</u></i>

In [17]:
url = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-08-22/population.csv'
pop = pd.read_csv(url)

In [21]:
selectpop = pop[(pop['year'] >= 2010) & (pop['year'] < 2020)]

In [36]:
#for whom origin=asylum. Seems to largely be for IDPs. 
#Not sure about situations like Sweden here asylum_seekers=21 and all other types 0.
selectpop[selectpop['coo'] == selectpop['coa']].sample(10)

Unnamed: 0,year,coo_name,coo,coo_iso,coa_name,coa,coa_iso,refugees,asylum_seekers,returned_refugees,idps,returned_idps,stateless,ooc,oip,hst
39852,2018,Jordan,JOR,JOR,Jordan,JOR,JOR,0,0,0,0,0,0,1355,,
30660,2016,Philippines,PHI,PHL,Philippines,PHI,PHL,0,0,0,87418,255626,0,0,,
23137,2015,Cameroon,CMR,CMR,Cameroon,CMR,CMR,0,0,0,92657,18636,0,0,,
39346,2018,Croatia,HRV,HRV,Croatia,HRV,HRV,0,0,0,0,0,0,4376,,
36057,2017,Serbia and Kosovo: S/RES/1244 (1999),SRB,SRB,Serbia and Kosovo: S/RES/1244 (1999),SRB,SRB,0,0,0,217453,193,0,0,,
11818,2012,Rwanda,RWA,RWA,Rwanda,RWA,RWA,0,0,0,0,0,0,89,,
23197,2015,Colombia,COL,COL,Colombia,COL,COL,0,0,0,6939067,0,0,0,,
18349,2014,Congo,COB,COG,Congo,COB,COG,0,0,0,0,0,0,101,,
28140,2016,Colombia,COL,COL,Colombia,COL,COL,0,0,0,7410816,0,0,0,,
12155,2012,Sweden,SWE,SWE,Sweden,SWE,SWE,0,21,0,0,0,0,0,,


In [37]:
selectpop=selectpop[selectpop['coo'] != selectpop['coa']]

In [38]:
selectpop.describe()

Unnamed: 0,year,refugees,asylum_seekers,returned_refugees,idps,returned_idps,stateless,ooc,oip,hst
count,47430,47430,47430,47430,47430,47430,47430,47430,36,0.0
mean,2015,3195,471,79,0,0,761,88,171532,
std,3,50015,6446,2655,0,0,19686,2727,353168,
min,2010,0,0,0,0,0,0,0,11,
25%,2012,5,0,0,0,0,0,0,13673,
50%,2015,13,8,0,0,0,0,0,32830,
75%,2017,94,52,0,0,0,0,0,138700,
max,2019,3622366,940668,381275,0,0,955399,162842,1771237,


In [43]:
selectpop.groupby('coo').sum()

Unnamed: 0_level_0,year,refugees,asylum_seekers,returned_refugees,idps,returned_idps,stateless,ooc,oip,hst
coo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AFG,1698425,26658825,1822369,875761,0,0,0,7584,0,0
ALB,545921,123323,156888,0,0,0,0,2445,0,0
ALG,952896,45458,52154,17,0,0,0,166,0,0
AND,18123,31,15,0,0,0,0,0,0,0
ANG,910515,347457,41314,44765,0,0,0,366294,0,0
...,...,...,...,...,...,...,...,...,...,...
WES,20152,0,181,0,0,0,0,0,0,0
WSH,233723,1165904,9769,6,0,0,0,5,0,0
YEM,1138822,137559,128690,10,0,0,0,89,0,0
ZAM,243800,2609,2653,0,0,0,0,10,0,0


In [60]:
selectpop.set_index(['coo','coa']).loc[[('AFG','IRN'),('IRN','AFG')]].query('year == 2010')
#example of a situation where refugees from A went to B, and refugees from B went to A

Unnamed: 0_level_0,Unnamed: 1_level_0,year,coo_name,coo_iso,coa_name,coa_iso,refugees,asylum_seekers,returned_refugees,idps,returned_idps,stateless,ooc,oip,hst
coo,coa,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AFG,IRN,2010,Afghanistan,AFG,Iran (Islamic Rep. of),IRN,1027577,1639,8487,0,0,0,0,,
IRN,AFG,2010,Iran (Islamic Rep. of),IRN,Afghanistan,AFG,30,21,0,0,0,0,0,,
