In [298]:
import pandas as pd

## Read the collected information

In [299]:
df_remote_io = pd.read_csv('dataframes/remote-io.csv')
df_we_work_remotely = pd.read_csv('dataframes/we-work-remotely.csv')
df_stackoverflow = pd.read_csv('dataframes/stackoverflow.csv')

## Join DataFrames

In [300]:
df = pd.concat([df_remote_io, df_we_work_remotely],ignore_index = True)
df = pd.concat([df, df_stackoverflow],ignore_index = True)
df

Unnamed: 0,Title,Link,Technologies
0,Generalist Mid/Senior-level Software Develope...,https://www.remote.io/job/4011/generalist-mid-...,"graphql,postgresql,typescript,react,kotlin"
1,ActionScript / FLASH Developer at Surge,https://www.remote.io/job/4010/actionscript-fl...,
2,DevOps Engineer at Corsearch,https://www.remote.io/job/4009/devops-engineer...,"machine learning,bash,amazon web services,dock..."
3,"Full Stack Engineer (.Net, Angular) at Modus ...",https://www.remote.io/job/4008/full-stack-engi...,angular
4,Search Wrangler at Automattic,https://www.remote.io/job/4007/search-wrangler...,"elasticsearch,wordpress,sql,java,scala"
...,...,...,...
2034,Senior Linux Software Engineer,https://stackoverflow.com/jobs/367261/senior-l...,"sql,c++,javascript"
2035,Software Engineer,https://stackoverflow.com/jobs/367258/software...,"sql,c++,javascript"
2036,Elixir/API Engineer with Growing Messaging Com...,https://stackoverflow.com/jobs/335070/elixir-a...,"graphql,elixir,postgresql"
2037,Senior Software Engineer - Big Data/AI,https://stackoverflow.com/jobs/342960/senior-s...,"python,java"


## Dictionary of technologies

In [301]:
technologies_set = df['Technologies']

technologies_dict= {}

for technologies in technologies_set:
    for technology in str(technologies).split(','):
        if technology in technologies_dict.keys():
            technologies_dict[technology] += 1
        else:
            technologies_dict[technology] =  1

technologies_dict_sorted = {k: v for k, v in sorted(technologies_dict.items(), key=lambda item: item[1])}
technologies_dict_sorted.pop('nan')
technologies_dict_sorted

{'unreal engine': 1,
 'matlab': 1,
 'umbraco': 1,
 'orchardcms': 1,
 'assembly': 2,
 'couchbase': 2,
 'cordova': 3,
 'chromium': 3,
 'r': 3,
 'react bootstrap': 4,
 'seo': 4,
 'asp': 5,
 'dart': 6,
 'f#': 6,
 'clojure': 7,
 'mariadb': 7,
 'firebase': 8,
 'canvas': 10,
 'pytorch': 11,
 'microsoft sql server': 12,
 'drupal': 13,
 'c': 13,
 'flutter': 14,
 'xamarin': 14,
 'ux': 15,
 'tensorflow': 16,
 'pandas': 16,
 'elixir': 17,
 'ui': 19,
 'cassandra': 22,
 'shopify': 22,
 'apollo': 23,
 'objective c': 24,
 'powershell': 25,
 'hadoop': 27,
 'golang': 30,
 'rabbitmq': 30,
 'puppet': 32,
 'kotlin': 37,
 'oracle': 37,
 'bash': 42,
 'laravel': 42,
 'heroku': 43,
 'git': 46,
 'c++': 47,
 'dynamodb': 48,
 'google cloud platform': 49,
 'flask': 49,
 'swift': 50,
 'wordpress': 55,
 'aws': 60,
 'jquery': 61,
 'shell': 67,
 'django': 80,
 'ansible': 81,
 'apache': 81,
 'react native': 83,
 'mongodb': 84,
 'graphql': 85,
 'elasticsearch': 91,
 'nosql': 94,
 'machine learning': 104,
 'vue': 115,
 '

## List of top technologies

In [302]:
top_5_techs = list(technologies_dict_sorted.keys())[::-1][:5]
top_5_techs

['javascript', 'sql', 'react', 'python', 'scala']

## Erase all null values from df

In [303]:
df = df.dropna(axis=0, subset=['Technologies'])
df[df['Technologies'].isnull()]
df.isna().sum() 

Title           0
Link            0
Technologies    0
dtype: int64

## Find related technologies for the top 5 languages

In [304]:
first_rows = df[df['Technologies'].str.contains(top_5_techs[0])]
second_rows = df[df['Technologies'].str.contains(top_5_techs[0])]
third_rows = df[df['Technologies'].str.contains(top_5_techs[0])]
fourth_rows = df[df['Technologies'].str.contains(top_5_techs[0])]
fifth_rows = df[df['Technologies'].str.contains(top_5_techs[0])]

## Def get_dummies(df, col_name)

This function:
- Takes an original df and the column to get the dummies from
- Concatenates the original df 
- Erases the column the dummies where created from

In [310]:
def get_dummies(df,col_name):
    new_df = df[col_name].str.get_dummies(sep=",")
    new_df = pd.concat([df,new_df], axis=1)
    del new_df[col_name]
    return new_df

## Get technologies dummies

In [312]:
tech_dummies = get_dummies(df,'Technologies')

tech_dummies

Unnamed: 0,Title,Link,amazon web services,angular,ansible,apache,apollo,asp,assembly,aws,...,swift,tensorflow,typescript,ui,umbraco,unreal engine,ux,vue,wordpress,xamarin
0,Generalist Mid/Senior-level Software Develope...,https://www.remote.io/job/4011/generalist-mid-...,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,DevOps Engineer at Corsearch,https://www.remote.io/job/4009/devops-engineer...,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Full Stack Engineer (.Net, Angular) at Modus ...",https://www.remote.io/job/4008/full-stack-engi...,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Search Wrangler at Automattic,https://www.remote.io/job/4007/search-wrangler...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,Dveloppeur iOS confirm en tltravail at Tayasui,https://www.remote.io/job/4006/dveloppeur-ios-...,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2034,Senior Linux Software Engineer,https://stackoverflow.com/jobs/367261/senior-l...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2035,Software Engineer,https://stackoverflow.com/jobs/367258/software...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2036,Elixir/API Engineer with Growing Messaging Com...,https://stackoverflow.com/jobs/335070/elixir-a...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2037,Senior Software Engineer - Big Data/AI,https://stackoverflow.com/jobs/342960/senior-s...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Melt DataFrame

In [313]:
melted_df = pd.melt(df_dummies, id_vars = ["Title","Link"])
# id_columns


melted_df = melted_df[melted_df['value'] != 0]
del melted_df['value']
melted_df.columns = ['Title', 'Link', 'Technology']

# Group by variable and aggragate by sum!


In [314]:
melted_df

Unnamed: 0,Title,Link,Technology
1,DevOps Engineer at Corsearch,https://www.remote.io/job/4009/devops-engineer...,amazon web services
14,UI Lead / Front-End Developer (ReactJS / Reac...,https://www.remote.io/job/3990/ui-lead-front-e...,amazon web services
19,"Senior Systems Administrator (Washington, D.C...",https://www.remote.io/job/3981/senior-systems-...,amazon web services
24,DevOps Engineer [100% REMOTE] at Clevertech,https://www.remote.io/job/3971/devops-engineer...,amazon web services
30,DevOps Engineer with Kubernetes (CET) [100% R...,https://www.remote.io/job/3965/devops-engineer...,amazon web services
...,...,...,...
122916,"UI Developer (CSS, HTML and JavaScript)",https://stackoverflow.com/jobs/378493/ui-devel...,xamarin
122935,Senior Software Developer (Istanbul),https://stackoverflow.com/jobs/378170/senior-s...,xamarin
122936,Senior Xamarin Developer,https://stackoverflow.com/jobs/247102/senior-x...,xamarin
122979,Javascript Full Stack Developer (Remote or On ...,https://stackoverflow.com/jobs/304681/javascri...,xamarin


## VISUALIZATION 🙌🏼

In [315]:
import matplotlib.pyplot as plt 
import seaborn as sb

In [332]:
nb_of_techs = 8
top_techs = pd.DataFrame(melted_df['Technology'].value_counts())[:nb_of_techs]


plt.bar(top_techs.index,top_techs.Technology)


plt.show()

AttributeError: 'Index' object has no attribute 'python'

## Get specific technology related techs

In [288]:
top_techs_by_tech = input("Imput technology name:")

Imput technology name:react


In [289]:
filtered_by_tech = df[df['Technologies'].str.contains(top_techs_by_tech)]
filtered_by_tech

Unnamed: 0,Title,Link,Technologies
0,Generalist Mid/Senior-level Software Develope...,https://www.remote.io/job/4011/generalist-mid-...,"graphql,postgresql,typescript,react,kotlin"
6,Senior Back End Engineer at DoubleGDP (San Fr...,https://www.remote.io/job/4005/senior-back-end...,"javascript,rust,redis,ruby,heroku,react"
22,UI Lead / Front-End Developer (ReactJS / Reac...,https://www.remote.io/job/3990/ui-lead-front-e...,"javascript,react native,python,amazon web serv..."
23,"Full Stack Developer at Future PLC (Bath, UK)",https://www.remote.io/job/3989/full-stack-deve...,"javascript,sql,java,react,docker"
30,Mobile Developer - Android/iOS - Freelance at...,https://www.remote.io/job/3982/mobile-develope...,"cordova,flutter,react native,swift,kotlin"
...,...,...,...
2014,Craftsmanship Focused / Test Loving Full Stack...,https://stackoverflow.com/jobs/207146/craftsma...,"django,flask,react,sql,python"
2018,"Software Engineer, IAM Authorization",https://stackoverflow.com/jobs/375370/software...,"golang,python,java,react,laravel,wordpress,mon..."
2019,Senior Product Manager,https://stackoverflow.com/jobs/375356/senior-p...,"vue,react,angular,java"
2025,Trainer (m/w/d) für MS Office Programme,https://stackoverflow.com/jobs/375303/trainer-...,"python,react,elasticsearch,docker,sql,node,jav..."
