# Visualizing Highly Skewed Data

Data that is highly skewed can produce visualizations where one or two bars are visible with everyone else looking to be at 0. This can be solved by changing the quantitative axis from a linear scale to a log scale.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt

import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px

# Country GDP

In [None]:
country_stats = pd.read_csv('../input/countries-of-the-world/countries of the world.csv')

In [None]:
country_stats['GDP'] = country_stats['GDP ($ per capita)'] * country_stats['Population']

In [None]:
df = country_stats.dropna(subset=['GDP']).sort_values(by='GDP').tail(50)

fig1 = px.bar(df, x='GDP', y='Country', text = 'GDP', orientation='h')

fig1.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig1.update_layout(template='simple_white', height=1000, title = 'GDP by Country')
fig1.show()


In [None]:
df = country_stats.dropna(subset=['GDP']).sort_values(by='GDP').tail(50)

#df['text'] = df.apply(lambda x: x['Country'] + ' ' + "{:,}".format(x['GDP']), axis=1)

#fig1 = px.scatter(df, x='GDP', y='Country',log_x=True, text='text')
fig1 = px.scatter(df, x='GDP', y='Country',log_x=True, range_x=[100000000000, 30000000000000], text='Country')

fig1.update_traces(textposition='middle right')

fig1.update_layout(template='simple_white', height=1000, yaxis = dict(showticklabels = False))
fig1.show()


# Presidential Votes

In [None]:
president_counties_df = pd.read_csv('../input/us-election-2020/president_county_candidate.csv')

total_votes_df = president_counties_df.groupby('candidate')['total_votes'].sum().reset_index().sort_values(by='total_votes',ascending=False)

In [None]:
df = total_votes_df.sort_values(by='total_votes')

fig3 = px.bar(df, x='total_votes', y='candidate', text='total_votes', orientation='h')
fig3.update_traces(texttemplate='%{text:.2s}', textposition='outside')

fig3.update_layout(template='simple_white', title='Total Votes by Candidate 2020 Election', height=800, xaxis=dict(title='Total Votes'), yaxis=dict(title=''))
fig3.show()


In [None]:
df = total_votes_df.sort_values(by='total_votes')

df['text'] = df.apply(lambda x: x['candidate'] + ' (' + "{:,}".format(x['total_votes']) + ')', axis=1)

fig1 = px.scatter(df, x='total_votes', y='candidate',log_x=True,range_x=[1, 10000000000], text='candidate')

fig1.update_traces(textposition='middle left')

fig1.update_layout(template='simple_white', title='Total Votes by Candidate 2020 Election', height=800, xaxis=dict(title='Total Votes'), yaxis = dict(showticklabels = False, title=''))
fig1.show()
