##### Import the necessary python libraries.


In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from collections import Counter as cn


##### Create dataframe from stack overflow survey 
##### (Stack overflow survey link, https://insights.stackoverflow.com/survey).

In [2]:
df = pd.read_csv('survey_results_public.csv')


##### Truncate Industry and EdLevel column answers so they fit on graph outputs. Convert YearsCode and YearsCodePro columns to int data types amd remove the non-int options, then change NAN answers to 0. 

In [4]:
def makeNum(x): 
    if x == 'Less than 1 year': x = '1'
    if x == 'More than 50 years': x = '50'
    return x

def truncA(x):
    if len(str(x)) > 12: x = x[:20] + '...'; return x
    
df['Industry'] = df['Industry'].map(truncA)
df['EdLevel'] = df['EdLevel'].map(truncA)
df['YearsCode'] = pd.to_numeric(df['YearsCode'].map(makeNum))
df['YearsCodePro'] = pd.to_numeric(df['YearsCodePro'].map(makeNum))
df['YearsCode'] = df['YearsCode'].fillna(0)
df['YearsCodePro'] = df['YearsCodePro'].fillna(0)



##### Create subset grouped by Industry column, summing ConvertedCompYearly to create bar graph showing totoal annual imcome based on Industry type.

In [5]:
bPay = df.groupby(['Industry'],as_index=False).sum('ConvertedCompYearly')
fig = px.bar(bPay,x='Industry',y='ConvertedCompYearly',color='Industry',
             template='plotly_dark',)

fig.show()

##### Create count subset grouped by RemoteWork and Age columns and use subset for a sunburst chart for remote work preference/age.

In [6]:

def truncA (x):
    if x == 'Hybrid (some remote, some in-person)':
        x = 'Hybrid'
    elif x.endswith('old'):
        x = x.replace('years old', 'years')
    return x
rWork = (df.groupby(['RemoteWork', 'Age']).agg(Count=('RemoteWork','size')).reset_index())
rWork
rWork['RemoteWork'] = rWork['RemoteWork'].map(truncA)
rWork['Age'] = rWork['Age'].map(truncA)
fig = px.sunburst(rWork, path=['RemoteWork', 'Age'], values='Count',template='plotly_dark'
                  ,title='Age Breakdown By Remote Work Preference')
fig.show()

# rWork = df.groupby(['RemoteWork','Age']).size().groupby(level=1).max()       
# array1 = df.groupby('RemoteWork')['Age'].apply(list)




##### Create subsets for both YearsCode and YearsCodePro summing ConvertedCompYearly to see total income for for each.

In [7]:
totInc = df.groupby(['YearsCode'],as_index=False).sum('ConvertedCompYearly')
totInc2 = df.groupby(['YearsCodePro'],as_index=False).sum('ConvertedCompYearly')
yearsCodeGraph = px.bar(totInc,y='ConvertedCompYearly', x='YearsCode', template='plotly_dark', 
                        title='Total Income ($) Based on Total Years Coding',color='ConvertedCompYearly',
                           color_continuous_scale='Inferno', labels={'ConvertedCompYearly':'Total Income',
                                                                     'YearsCode':'Years Coding'})
yearsCodeProGraph = px.bar(totInc2,y='ConvertedCompYearly',x='YearsCodePro', template='plotly_dark',
                           title='Total Income ($) Based on Years Coding Professionaly',color='ConvertedCompYearly',
                           color_continuous_scale='Inferno', labels={'ConvertedCompYearly':'Total Income', 
                                                                     'YearsCodePro':'Years Coding Professionaly'})

yearsCodeGraph.show()
yearsCodeProGraph.show()
# fig = go.Figure(data= yearsCodeGraph.data + yearsCodeProGraph.data)
# fig.layout.template='plotly_dark'



##### Create scatter plots for both YearsCode and YearsCodePro against ConvertedCompYearly to see income distribution as relates to codnig experience. Capping income at 1 million anually to avoid outliers skewing majority of entries.

In [9]:
fig = px.scatter(df, x='YearsCode',y='ConvertedCompYearly',template='plotly_dark', color='EdLevel',
                 title='Annual Income Based on Reported Years Coding',trendline='ols')
fig.update_layout(yaxis_range=[0,1000000])
fig.show()
fig = px.scatter(df, x='YearsCodePro',y='ConvertedCompYearly',template='plotly_dark', color='EdLevel',
                 title='Annual Income Based on Reported Years Coding Pro')
fig.update_layout(yaxis_range=[0,1000000])
fig.show()

##### Per response, count each instance of possible code learning source (mutiple choice question in the survey), for both online, and other sources.

In [331]:
def dFrame (x):
    count = cn()
    for i in x:
        rez = str(i).split(';')
        count.update(cn(rez))
    return pd.DataFrame(dict(count),index=['key']).T
   
lCode = dFrame(df['LearnCode'])
lCodeO = dFrame(df['LearnCodeOnline'])
#fig = px.bar(lCode, y="key", template='plotly_dark')
fig = px.pie(lCode, values='key',template='plotly_dark',names=lCode.index, hole=.3,
             title='Response Percent for Code Learning Source')
fig2 = px.pie(lCodeO, values='key',template='plotly_dark',names=lCodeO.index, hole=.3,
              title='Response Percent for Online Code Learning Source')

fig.show()
fig2.show()

##### Determining what affect education level has on becoming a professional developer and also how education relates to income.
##### Create subset on MainBranch column for responders stating they are pro devs. Use subset to get sum of responders, sum of annual income, and median income based on education level.

In [5]:
subS = df[df['MainBranch']=='I am a developer by profession']
devMas = subS.groupby(['EdLevel'],as_index=False).count()
devMas1 = subS.groupby(['EdLevel'],as_index=False).median('ConvertedCompYearly')
devMas2 = subS.groupby(['EdLevel'],as_index=False).sum('ConvertedCompYearly')
fig1 = px.bar(devMas,x='EdLevel',y='MainBranch',color='EdLevel',template='plotly_dark',
              title='Total Responces of Pro Developers Based on Education Level')
fig2 = px.bar(devMas2,x='EdLevel',y='ConvertedCompYearly',color='EdLevel',template='plotly_dark',
              title='Sum of Annual Income of Pro Developers Based on Education Level')
fig3 = px.bar(devMas1,x='EdLevel',y='ConvertedCompYearly',color='EdLevel',template='plotly_dark',
              title='Median Annual Income of Pro Developers Based on Education Level')
fig1.show()
fig2.show()
fig3.show()