In [42]:
import numpy as np
import pandas as pd
import folium
from googletrans import Translator

In [33]:
df = pd.read_csv("data/problem 2 data.xlsx - Data.csv")
df.columns = [
    'id', 'start_date', 'end_date', 'record_date', 'lat', 'lng', 'governorate', 'district', 
    'registered', 'registration_type', 'business_name', 'economic_sector', 'sub_sector', 
    'products_services', 'phone_number', 'business_phone', 'employee_count', 'enterprise_size', 
    'has_vacancies', 'num_vacancies', 'current_interns', 'current_seasonal', 'current_entry_level', 
    'current_mid_senior', 'current_senior_mgmt', 'current_cust_service', 'current_sales', 
    'current_priority_it', 'current_priority_marketing', 'current_priority_admin', 'current_priority_finance', 
    'current_priority_logistics', 'current_priority_technical', 'current_priority_others', 'current_other_specify', 
    'youth_employment', 'future_needs', 'future_num_vacancies', 'future_interns', 'future_seasonal', 
    'future_entry_level', 'future_mid_senior', 'future_senior_mgmt', 'future_total', 'future_cust_service', 
    'future_sales', 'future_priority_it', 'future_priority_marketing', 'future_priority_admin', 
    'future_priority_finance', 'future_priority_logistics', 'future_priority_technical', 'future_priority_others', 
    'future_other_specify']
date_columns = ['start_date', 'end_date', 'record_date']
for col in date_columns:
    df[col] = pd.to_datetime(df[col], errors='coerce')

In [None]:
data = df
translator = Translator()
arabic_columns = ['business_name', 'products_services', 'current_other_specify', 'future_other_specify']
def translate_column(column):
    return column.apply(lambda x: translator.translate(x, src='ar', dest='en').text if pd.notnull(x) else x)

for col in arabic_columns:
    data[col] = translate_column(data[col])
data.head()

In [34]:
data.head()

Unnamed: 0,id,start_date,end_date,record_date,lat,lng,governorate,district,registered,registration_type,...,future_cust_service,future_sales,future_priority_it,future_priority_marketing,future_priority_admin,future_priority_finance,future_priority_logistics,future_priority_technical,future_priority_others,future_other_specify
0,376583921,2024-08-28,2024-08-28,2024-08-28,30.518529,35.57051,Maan,Ash-Shobek,Yes,Sole proprietorship,...,0,0,0,0,0,0,0,0,1,ميكانيكي سيارات /عامل غيار زيوت
1,376583930,2024-08-28,2024-08-28,2024-08-28,30.518472,35.570386,Maan,Ash-Shobek,Yes,Sole proprietorship,...,0,0,0,0,0,0,0,0,1,حداد
2,376583936,2024-08-28,2024-08-28,2024-08-28,30.518922,35.573605,Maan,Ash-Shobek,Yes,Sole proprietorship,...,0,1,0,0,0,0,0,0,0,
3,376583943,2024-08-28,2024-08-28,2024-08-28,30.518988,35.573454,Maan,Ash-Shobek,Yes,Sole proprietorship,...,0,0,1,0,0,0,0,0,0,
4,376583950,2024-08-28,2024-08-28,2024-08-28,30.519855,35.57486,Maan,Ash-Shobek,Yes,Cooperative,...,0,0,0,0,0,0,0,0,1,بائع كاشير /خباز


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1760 entries, 0 to 1759
Data columns (total 54 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   id                          1760 non-null   int64         
 1   start_date                  1760 non-null   datetime64[ns]
 2   end_date                    1760 non-null   datetime64[ns]
 3   record_date                 1760 non-null   datetime64[ns]
 4   lat                         1760 non-null   float64       
 5   lng                         1760 non-null   float64       
 6   governorate                 1760 non-null   object        
 7   district                    1760 non-null   object        
 8   registered                  1760 non-null   object        
 9   registration_type           1524 non-null   object        
 10  business_name               1760 non-null   object        
 11  economic_sector             1760 non-null   object      

In [37]:
district_counts = df['district'].value_counts().reset_index()
district_counts.columns = ['district', 'business_count']

central_coords = df.groupby('district').agg({'lat': 'mean','lng': 'mean'}).reset_index()
central_coords.columns = ['district', 'Central Latitude', 'Central Longitude']

district_df = pd.merge(district_counts, central_coords, on='district', how='inner')

district_df

Unnamed: 0,district,business_count,Central Latitude,Central Longitude
0,Petra,151,30.305667,35.476001
1,Fuhais and Mahes,148,32.003366,35.785633
2,Aqaba Al-Qasabah,148,29.534848,35.006126
3,Mafraq Al-qasabeh,145,32.341822,36.210818
4,Jerash Qasabah,137,32.326744,35.893851
5,Ajloun Qasabah,125,32.332715,35.742472
6,Ash-Shobek,117,30.516729,35.552917
7,Al Dlayel,113,32.108437,36.247649
8,Naour,108,31.886057,35.856042
9,Irbid Qasabah,101,32.550231,35.850257


In [30]:
# Create a base map centered around an approximate central location in Jordan
map_folium = folium.Map(location=[31.5852, 35.8623], zoom_start=7)

# Plot each district with the central latitude, longitude, and business count information
for index, row in district_df.iterrows():
    district = row['District']
    business_count = row['Business Count']
    lat = row['Central Latitude']
    lng = row['Central Longitude']
    
    # Add a marker for each district
    folium.Marker(
        location=[lat, lng],
        popup=f"{district}: {business_count} businesses",
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(map_folium)

# Display the map
map_folium