# Finding home area and calculating visit matrix

In [1]:
import pandas as pd
import numpy as np
import nafot
import os
from tqdm import tqdm_notebook
import multiprocessing as mp
import time
import datetime
import pickle
%matplotlib inline

Get the sample files list

In [2]:
# Get the file names
sample_files_names = os.listdir('../../data/new_samples/with_stat_area')    

### Calculate only visit matrix - all

In [3]:
# Load home and age data
home_data = pd.read_csv('../../Data/new_samples/home_area/home_area_data_all_with_age.csv')
home_data['home_stat_area'] = home_data['home_stat_area'].apply(lambda x: float(x) if x!= 'NotDetermined' else x)

for file_name in tqdm_notebook(sample_files_names):
    # Load the data
    loc_data = pd.read_csv(f'../../data/new_samples/with_stat_area/{file_name}')
    # Remove records without stat area
    loc_data.dropna(inplace=True)
    
    # Get home data only for the relevant users
    cur_home_data = home_data[home_data.imsi.isin(set(loc_data.imsi))]

    # Calculate the visit raw matrix
    visit_matrix_raw = nafot.calculate_visit_matrix_with_age(loc_data, cur_home_data)
#     visit_matrix_raw = nafot.calculate_visit_matrix(loc_data, cur_home_data)
 
    # Save
    np.save(f'../../data/new_samples/matrix_raw/with_age_group/{file_name[:-4]}_matrix_raw.npy', visit_matrix_raw)

A Jupyter Widget




Merge visit matrices - all

In [5]:
# Visit matrices the file names
matrix_files_names = os.listdir('../../data/new_samples/matrix_raw/with_age_group')

# Initialize matrices
matrix_raw_all = np.zeros((3070*2, 3070))

# Go over and sum
for file_name in matrix_files_names:
    cur_mat = np.load(f'../../data/new_samples/matrix_raw/with_age_group/{file_name}')
    matrix_raw_all += cur_mat

    
# Save merged matrices
np.save(f'../../data/new_samples/matrix_raw/with_age_group/visit_matrix_raw_all_with_age_group.npy', matrix_raw_all)

Adjust total matrix

In [16]:
matrix_raw_all_adjusted = nafot.adjust_visit_matrix_with_age(matrix_raw_all)
np.save(f'../../data/new_samples/matrix_raw/with_age_group/visit_matrix_raw_adjusted_with_age_group.npy', matrix_raw_all_adjusted)

# OLD

In [5]:
def get_home_data_and_visit_matrix(loc_data):
    home_data = nafot.get_home_stat_area(loc_data, False)
    visit_matrix_raw = nafot.calculate_visit_matrix(loc_data, home_data)
    
    return home_data, visit_matrix_raw

In [2]:
home_data = pd.read_csv('../../Data/new_samples/home_area/home_area_data_all_with_age.csv')
home_data['home_stat_area'] = home_data['home_stat_area'].apply(lambda x: float(x) if x!= 'NotDetermined' else x)

In [3]:
loc_da = pd.read_csv('../../Data/new_samples/with_stat_area/sample_00with_stat.csv')
loc_da.dropna(inplace=True)

In [6]:
cur_home_data = home_data[home_data.imsi.isin(set(loc_da.imsi))]

In [4]:
sm = loc_da.sample(1000).copy()
cur_home_data_sm = home_data[home_data.imsi.isin(set(sm.imsi))]

In [5]:
aaa = nafot.calculate_visit_matrix_with_age(sm, cur_home_data_sm)

In [6]:
aaa.shape

(6140, 3070)

In [7]:
bbb = nafot.adjust_visit_matrix_with_age(aaa)

In [10]:
bbb.sum()

774.0

### Calculate only visit matrix - children and adult

In [None]:
# Load home and age data
home_data = pd.read_csv('../../Data/new_samples/home_area/home_area_data_all_with_age.csv')
home_data['home_stat_area'] = home_data['home_stat_area'].apply(lambda x: float(x) if x != 'NotDetermined' else x)

with open('../../data/new_samples/children_imsi/children_imsi.pickle', 'rb') as pickle_in:
    children_imsi = pickle.load(pickle_in)

for file_name in tqdm_notebook(sample_files_names):
    # Load the data
    loc_data = pd.read_csv(f'../../data/new_samples/with_stat_area/{file_name}')
    # Remove records without stat area
    loc_data.dropna(inplace=True)
    
    # Divide by age group
    loc_data_children = loc_data[loc_data.imsi.isin(children_imsi)].copy()
    loc_data_adults = loc_data[~loc_data.imsi.isin(children_imsi)].copy()
    
    # Get home data only for the relevant users
    home_data_children = home_data[home_data.imsi.isin(set(loc_data_children.imsi))]
    home_data_adult = home_data[home_data.imsi.isin(set(loc_data_adults.imsi))]

    # Calculate the visit raw matrix
    visit_matrix_raw_children = nafot.calculate_visit_matrix(loc_data_children, home_data_children)
    visit_matrix_raw_adult = nafot.calculate_visit_matrix(loc_data_adults, home_data_adult)
    
    # Save
    np.save(f'../../data/new_samples/matrix_raw/children/{file_name[:-4]}_matrix_raw_childern.npy', visit_matrix_raw_children)
    np.save(f'../../data/new_samples/matrix_raw/adult/{file_name[:-4]}_matrix_raw_adult.npy', visit_matrix_raw_adult)

A Jupyter Widget

Merge visit matrices - children and adult

In [5]:
# Visit matrices the file names
children_files_names = os.listdir('../../data/new_samples/matrix_raw/children')
adult_files_names = os.listdir('../../data/new_samples/matrix_raw/adult')

# Initialize matrices
children_matrix_raw = np.zeros((3070,3070))
adult_matrix_raw = np.zeros((3070,3070))

# Go over and sum
for children_file_name in children_files_names:
    cur_mat_children = np.load(f'../../data/new_samples/matrix_raw/children/{children_file_name}')
    children_matrix_raw +=cur_mat_children
    
for adult_file_name in children_files_names:
    cur_mat_adult = np.load(f'../../data/new_samples/matrix_raw/adult/{adult_file_name}')
    adult_matrix_raw +=cur_mat_adult
    
# Save merged matrices
# np.save(f'../../data/new_samples/matrix_raw/children/visit_matrix_raw_children.npy', children_matrix_raw)
# np.save(f'../../data/new_samples/matrix_raw/adult/visit_matrix_raw_adult.npy', adult_matrix_raw)

### Update home area and visit matrix

In [3]:
for file_name in tqdm_notebook(sample_files_names):
    # Load the data
    loc_data = pd.read_csv(f'../data/samples/new samples/with stat area/{file_name}')
    # Remove records without stat area
    loc_data.dropna(inplace=True)
    
    # Get the home area data and the visits raw matrix
    home_data, visit_matrix_raw = get_home_data_and_visit_matrix(loc_data)
    
    # Save
    home_data.to_csv(f'../data/samples/new samples/home area/{file_name[:-4]} home_area.csv', index=False)
    np.save(f'../data/samples/new samples/matrix raw/{file_name[:-4]} matrix_raw.npy', visit_matrix_raw)

A Jupyter Widget


