In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import plotly

import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True) 
%matplotlib inline
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
#importing the dataset into pandas dataframe
dataset = pd.read_csv("../input/countries of the world.csv")
#Finding out the number of rows and columns in our dataset i.e total countries and attributes
print(dataset.shape)

In [None]:
dataset.columns = ['country', 'region', 'population', 'area','population_density', 'coastline','migration', 'infant_mortality',
       'gdp', 'literacy', 'phones_per_1000', 'arable','crops', 'other', 'climate', 'birthrate', 'deathrate',
       'agriculture', 'industry', 'service']

In [None]:
print(dataset.info())
print(dataset.dtypes)

A quick scan of the top rows of the dataset shows that data in few of the columns is not in the right format

In [None]:
dataset.head()

Converting the dataset into right format

In [None]:
column_list =['population_density', 'coastline','migration', 'infant_mortality',
      'literacy', 'phones_per_1000', 'arable','crops', 'other', 'climate', 'birthrate', 'deathrate','agriculture', 'industry', 'service']
for item in column_list:
    def column_data(item):
        dataset[item]= dataset[item].str.replace(',' ,'.').astype(float) 
    column_data(item) 

dataset.country = dataset.country.astype('category')
dataset.region = dataset.region.str.strip().astype('category')    

dataset.head()   

NaN Values


Taking a look at "dataset.info()" table we come across a number of columns with 'Nan' Values. 
My first  approach was to fill the 'NaN' by taking the mean over the region in which the country is located. But I couldnt come up with the right code. So I took the mean over the entire dataset

In [None]:
dataset.fillna(dataset.mean(),inplace=True)
dataset.head()

The dataset has been pre-processed.Lets start creating a few data visualisation to indentify  patterns, correlations and trends . I will try to experiment with a number of Graphing Libraries for my visualisations.
For first visualisation,  I will use Choropleth Maps from the python plotly graphing library.These maps enable us to visualize a measurement at national or global scale.

In [None]:
# We need to create data object and layout object  which contains a dict
# colorscale → This is the color for the geographical map elements
# locations → This is the data for the state abbreviations
# locationmode → This lets plotly know we what nation to use
# z → This is the numerical measurement for each state element; This should be of the same index sequence as the locations argument
# text → This is the categorical value for each element
# colorbar → Title for right side bar

data = dict(type='choropleth',
                locations = dataset['country'],
                locationmode = 'country names',
                z = dataset['population']/1000,
                text = dataset['country'],
                colorbar = {'title':'Population Scale'},
                colorscale = 'Viridis',
                reversescale = True
                )

# Lets make a layout
layout = dict(title='Population Spread Across The Globe ',
geo = dict(showframe=False,projection={'type':'natural earth'}))

worldmap = go.Figure(data = [data],layout = layout)
plotly.offline.iplot(worldmap, validate=True)

Similarly we can plot other attributes using Choropleth Map Plots. 

In [None]:
#Another set of relationships can be gauged using bubble plots
axis0='literacy'
axis1='infant_mortality'
trace_items = []
for item in list(dataset['region'].unique().astype(str)):
    trace_item = go.Scatter(
    x = dataset[axis0][dataset['region'] == item],
    y =  dataset[axis1][dataset['region'] == item],
    mode='markers',
    name= item,
    text=dataset['country'][dataset['region'] == item],
    marker=dict(
    size=list(np.cbrt(dataset['population'][dataset['region'] == item])/10)
    ))
    trace_items.append(trace_item)



data = trace_items

layout = go.Layout(
    title= 'literacy vs Infant_Mortality',
    xaxis=dict(
        title=' Literacy',
        titlefont=dict(
            family='Courier New, monospace',
            size=18
        )
    ),
    yaxis=dict(
        title='Infant_Mortality',
        titlefont=dict(
            family='Courier New, monospace',
            size=18           
        )
    )
)

fig1 = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig1, show_link=True)