## Understanding NumPy Array

In [1]:
# Creating an array
import numpy as np
a = np.array([2,4,6,8,10])
print(a)

[ 2  4  6  8 10]


In [2]:
# Creating an array using arange()
import numpy as np
a = np.arange(1,11, 3)
print(a)

[ 1  4  7 10]


In [170]:
b = np.arange(10)
print(b)

[0 1 2 3 4 5 6 7 8 9]


In [171]:
import numpy as np

p = np.zeros((3,3))   # Create an array of all zeros
print(p) 

q = np.ones((2,2))    # Create an array of all ones
print(q)

r = np.full((2,3), 4)  # Create a constant array
print(r) 

s = np.eye(4)         # Create a 2x2 identity matrix
print(s) 

t = np.random.random((3,3))  # Create an array filled with random values
print(t)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[1. 1.]
 [1. 1.]]
[[4 4 4]
 [4 4 4]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[0.74275188 0.36666642 0.50402548]
 [0.48749342 0.89631741 0.27733584]
 [0.23041019 0.679075   0.8393468 ]]


In [4]:
# Creating an array using arange()
import numpy as np
a = np.arange(1,11)
print(type(a))
print(a.dtype)

<class 'numpy.ndarray'>
int32


In [8]:
b = a.T
print(b.shape)

(10,)


In [9]:
# check shape pf Array
print(a.shape)

(10,)


In [174]:
a = np.array([[5,6],[7,8],[9,10]])
print(a)

[[ 5  6]
 [ 7  8]
 [ 9 10]]


In [175]:
print(a.T)

[[ 5  7  9]
 [ 6  8 10]]


In [176]:
print(a.shape, a.T.shape)

(3, 2) (2, 3)


In [177]:
print(a[0,0])

5


In [178]:
print(a[0,1])

6


In [179]:
print(a[0,2])

IndexError: index 2 is out of bounds for axis 1 with size 2

In [180]:
print(a[1,0])

7


In [181]:
print(a[1,1])

8


## NumPy Array Numerical Data Types

In [22]:
arr=np.arange(1,11, dtype= np.float32)

print(arr)

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


In [2]:
import numpy as np
np.int(42.0 + 1.j)

TypeError: can't convert complex to int

In [183]:
c= complex(42, 1)
print(c)

(42+1j)


In [27]:
print(c.real,c.imag)

42.0 1.0


In [186]:
# Creating an array
import numpy as np
a = np.array([2,4,6,8,10,12])

print(a.dtype)

int32


In [191]:
# Create numpy array using arange() function
var1=np.arange(1,11, dtype='f')

print(var1)

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


In [192]:
print(np.arange(1,6, dtype='D'))

[1.+0.j 2.+0.j 3.+0.j 4.+0.j 5.+0.j]


In [193]:
print(np.dtype(float))

float64


In [194]:
print(np.dtype('f'))

float32


In [195]:
print(np.dtype('d')) 

float64


In [196]:
print(np.dtype('f8'))

float64


In [197]:
var2=np.array([1,2,3],dtype='float64')

print(var2.dtype.char)

d


In [198]:
print(var2.dtype.type)

<class 'numpy.float64'>


## Manipulating Shape of NumPy Array

In [200]:
# Create an array
arr = np.arange(12)

In [201]:
# Reshape the array dimension
new_arr=arr.reshape(4,3)

print(new_arr)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [202]:
# Reshape the array dimension
new_arr2=arr.reshape(3,4)

print(new_arr2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [203]:
# Create an array
arr=np.arange(1,10).reshape(3,3)
print(arr)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [42]:
# flatten the array
print(arr.flatten())

[1 2 3 4 5 6 7 8 9]


In [43]:
# ravel() function #returns contiguous flattened array
print(arr.ravel())

[1 2 3 4 5 6 7 8 9]


In [44]:
# Transpose the matrix
print(arr.transpose())

[[1 4 7]
 [2 5 8]
 [3 6 9]]


In [204]:
# resize the matrix
arr.resize(9,1)
print(arr)

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]


## Stacking of Numpy arrays

In [206]:
arr1 = np.arange(1,10).reshape(3,3)
print(arr1)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [211]:
arr2 = arr1//2
print(arr2)

[[0 1 1]
 [2 2 3]
 [3 4 4]]


In [212]:
arr3=np.hstack((arr1, arr2))

print(arr3)

[[1 2 3 0 1 1]
 [4 5 6 2 2 3]
 [7 8 9 3 4 4]]


In [215]:
# Horizontal stacking using concatenate() function
arr4=np.concatenate((arr1, arr2), axis=1)
print(arr4)

[[1 2 3 0 1 1]
 [4 5 6 2 2 3]
 [7 8 9 3 4 4]]


In [216]:
arr5=np.vstack((arr1, arr2))
print(arr5)

[[1 2 3]
 [4 5 6]
 [7 8 9]
 [0 1 1]
 [2 2 3]
 [3 4 4]]


In [217]:
arr6=np.concatenate((arr1, arr2), axis=0) 
print(arr6)

[[1 2 3]
 [4 5 6]
 [7 8 9]
 [0 1 1]
 [2 2 3]
 [3 4 4]]


In [218]:
arr7=np.dstack((arr1, arr2)) #depth wise stack
print(arr7)

[[[1 0]
  [2 1]
  [3 1]]

 [[4 2]
  [5 2]
  [6 3]]

 [[7 3]
  [8 4]
  [9 4]]]


In [219]:
# Create 1-D array
arr1 = np.arange(4,7) 
print(arr1)

[4 5 6]


In [220]:
# Create 1-D array
arr2 = 2 * arr1
print(arr2)

[ 8 10 12]


In [221]:
# Create column stack
arr_col_stack = np.column_stack((arr1,arr2))
print(arr_col_stack)

[[ 4  8]
 [ 5 10]
 [ 6 12]]


In [222]:
# Create row stack
arr_row_stack = np.row_stack((arr1,arr2)) 
print(arr_row_stack)

[[ 4  5  6]
 [ 8 10 12]]


## Partitioning Numpy Array

In [7]:
# Create an array
arr=np.arange(1,10).reshape(3,3)
print(arr)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [13]:
# Peroform horizontal splitting
#Split an array into multiple sub-arrays horizontally (column-wise).
arr_hor_split=np.hsplit(arr, np.array([1, 2]))

print(arr_hor_split)

[array([[1],
       [4],
       [7]]), array([[2],
       [5],
       [8]]), array([[3],
       [6],
       [9]])]


In [12]:
# vertical split
#Split an array into multiple sub-arrays vertically (row-wise).
arr_ver_split=np.vsplit(arr, np.array([1, 2]))

print(arr_ver_split)

[array([[1, 2, 3]]), array([[4, 5, 6]]), array([[7, 8, 9]])]


In [17]:
# split with axis=0
arr_split=np.split(arr,3,axis=0)

print(arr_split)

[array([[1, 2, 3]]), array([[4, 5, 6]]), array([[7, 8, 9]])]


In [15]:
# split with axis=1
np.split(arr,3,axis=1)

[array([[1],
        [4],
        [7]]), array([[2],
        [5],
        [8]]), array([[3],
        [6],
        [9]])]

## Changing Datatype of NumPy Arrays

In [232]:
# Create an array
arr=np.arange(1,10).reshape(3,3)
print("Integer Array:",arr)

# Change datatype of array
arr=arr.astype(float)

# print array
print("Float Array:", arr)

# Check new data type of array
print("Changed Datatype:", arr.dtype)

Integer Array: [[1 2 3]
 [4 5 6]
 [7 8 9]]
Float Array: [[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]
Changed Datatype: float64


In [233]:
# Change datatype of array
arr=arr.astype(float)

# Check new data type of array
print(arr.dtype)

float64


In [236]:
# Change datatype of array
arr=arr.astype(float)

# Check new data type of array
print(arr.dtype)

float64


In [237]:
# Create an array
arr=np.arange(1,10)

# Convert NumPy array to Python List
list1=arr.tolist()
print(list1)

[1, 2, 3, 4, 5, 6, 7, 8, 9]


## Slicing NumPy Array

In [249]:
# Create NumPy Array
arr = np.arange(10) 
print(arr)
print(arr[-2:])

[0 1 2 3 4 5 6 7 8 9]
[8 9]


In [67]:
print(arr[3:6])

[3 4 5]


In [68]:
print(arr[3:])

[3 4 5 6 7 8 9]


In [69]:
print(arr[-3:])

[7 8 9]


In [251]:
print(arr[2:8:2])

[2 4 6]


In [252]:
print(arr[1::3])

[1 4 7]


## Boolean and Fancy Indexing

In [254]:
# Create NumPy Array
arr = np.arange(21,41,2) #numpy.arange(start, stop,step)
print("Orignial Array:\n",arr)

# Boolean Indexing
print("After Boolean Condition:",arr[arr>30])

Orignial Array:
 [21 23 25 27 29 31 33 35 37 39]
After Boolean Condition: [31 33 35 37 39]


In [253]:
# Create NumPy Array
arr = np.arange(21,41,2)
print("Orignial Array:\n",arr)

# Boolean Indexing
print(arr>30)

Orignial Array:
 [21 23 25 27 29 31 33 35 37 39]
[False False False False False  True  True  True  True  True]


In [255]:
# Create NumPy Array
arr = np.arange(1,21).reshape(5,4)
print("Orignial Array:\n",arr)

# Selecting 2nd and 3rd row
indices = [1,2]
print("Selected 1st and 2nd Row:\n", arr[indices])

# Selecting 3nd and 4th row
indices = [2,3]
print("Selected 3rd and 4th Row:\n", arr[indices])

Orignial Array:
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]]
Selected 1st and 2nd Row:
 [[ 5  6  7  8]
 [ 9 10 11 12]]
Selected 3rd and 4th Row:
 [[ 9 10 11 12]
 [13 14 15 16]]


In [256]:
# Create row and column indices
row = np.array([1, 2])
col = np.array([2, 3])

print("Selected Sub-Array:", arr[row, col])

Selected Sub-Array: [ 7 12]


## Broadcasting arrays

In [257]:
# Create NumPy Array
arr1 = np.arange(1,5).reshape(2,2) 
print(arr1)

[[1 2]
 [3 4]]


In [258]:
# Create another NumPy Array
arr2 = np.arange(5,9).reshape(2,2) 
print(arr2)

[[5 6]
 [7 8]]


In [259]:
# Add two matrices
print(arr1+arr2)

[[ 6  8]
 [10 12]]


In [84]:
# Multiply two matrices
print(arr1*arr2)

[[ 5 12]
 [21 32]]


In [85]:
# Add a scaler value
print(arr1 + 3)

[[4 5]
 [6 7]]


In [86]:
# Multiply with a scalar value
print(arr1 * 3)

[[ 3  6]
 [ 9 12]]


## Create DataFrame

In [18]:
# Import pandas library 
import pandas as pd 
# Create empty DataFrame
df = pd.DataFrame() 

# Header of dataframe. 
df.head()

In [261]:
df

In [262]:
# Create dictionary of list
data = {'Name': ['Vijay', 'Sundar', 'Satyam', 'Indira'], 'Age': [23, 45, 46, 52 ]}   

# Create the pandas DataFrame 
df = pd.DataFrame(data)

# Header of dataframe. 
df.head()

Unnamed: 0,Name,Age
0,Vijay,23
1,Sundar,45
2,Satyam,46
3,Indira,52


In [264]:
# Pandas DataFrame by lists of dicts. 
# Initialise data to lists. 
data =[ {'Name': 'Vijay',  'Age': 23},{'Name': 'Sundar',  'Age': 25},{'Name': 'Shankar',  'Age': 26}]
# Creates DataFrame. 
df = pd.DataFrame(data,columns=['Name','Age']) 
# Print dataframe header 
df.head()  

Unnamed: 0,Name,Age
0,Vijay,23
1,Sundar,25
2,Shankar,26


In [265]:
# Creating DataFrame using list of tuples.
data = [('Vijay', 23),( 'Sundar', 45), ('Satyam', 46), ('Indira',52)] 
# Create dataframe
df = pd.DataFrame(data, columns=['Name','Age'])
# Print dataframe header 
df.head()  

Unnamed: 0,Name,Age
0,Vijay,23
1,Sundar,45
2,Satyam,46
3,Indira,52


## Pandas Series

In [266]:
# Creating Pandas Series using Dictionary
dict1 = {0 : 'Ajay', 1 : 'Jay', 2 : 'Vijay'}
# Create Pandas Series
series = pd.Series(dict1)
# Show series
series

0     Ajay
1      Jay
2    Vijay
dtype: object

In [268]:
# load Pandas and NumPy
import pandas as pd
import numpy as np
# Create NumPy array
arr = np.array([51,65,48,59, 68])
# Create Pandas Series
series = pd.Series(arr)
series

0    51
1    65
2    48
3    59
4    68
dtype: int32

In [269]:
# load Pandas and NumPy
import pandas as pd
import numpy as np
# Create Pandas Series
series = pd.Series(10, index=[0, 1, 2, 3, 4, 5])
series

0    10
1    10
2    10
3    10
4    10
5    10
dtype: int64

In [3]:
# Import pandas 
import pandas as pd

# Load data using read_csv() 
df = pd.read_csv("C:/Users/USER/Desktop/BETKOT Municipality Program/YASH SLIDES/Class3_NumPy_Plot/WHO_first9cols.csv")

# Show initial 5 records
df.head()

Unnamed: 0,Country,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
0,Afghanistan,1,1,151.0,28.0,,,,26088.0
1,Albania,2,2,27.0,98.7,6000.0,93.0,94.0,3172.0
2,Algeria,3,3,6.0,69.9,5940.0,94.0,96.0,33351.0
3,Andorra,4,2,,,,83.0,83.0,74.0
4,Angola,5,3,146.0,67.4,3890.0,49.0,51.0,16557.0


In [2]:
# Show last 5 records
df.tail()

Unnamed: 0,Country,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
197,Vietnam,198,6,25.0,90.3,2310.0,91.0,96.0,86206.0
198,West Bank and Gaza,199,1,,,,,,
199,Yemen,200,1,83.0,54.1,2090.0,65.0,85.0,21732.0
200,Zambia,201,3,161.0,68.0,1140.0,94.0,90.0,11696.0
201,Zimbabwe,202,3,101.0,89.5,,88.0,87.0,13228.0


In [3]:
df

Unnamed: 0,Country,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
0,Afghanistan,1,1,151.0,28.0,,,,26088.0
1,Albania,2,2,27.0,98.7,6000.0,93.0,94.0,3172.0
2,Algeria,3,3,6.0,69.9,5940.0,94.0,96.0,33351.0
3,Andorra,4,2,,,,83.0,83.0,74.0
4,Angola,5,3,146.0,67.4,3890.0,49.0,51.0,16557.0
...,...,...,...,...,...,...,...,...,...
197,Vietnam,198,6,25.0,90.3,2310.0,91.0,96.0,86206.0
198,West Bank and Gaza,199,1,,,,,,
199,Yemen,200,1,83.0,54.1,2090.0,65.0,85.0,21732.0
200,Zambia,201,3,161.0,68.0,1140.0,94.0,90.0,11696.0


In [4]:
# Show the shape of DataFrame
print("Shape:", df.shape)

Shape: (202, 9)


In [5]:
# Check the column list of DataFrame
print("List of Columns:", df.columns)

List of Columns: Index(['Country', 'CountryID', 'Continent', 'Adolescent fertility rate (%)',
       'Adult literacy rate (%)',
       'Gross national income per capita (PPP international $)',
       'Net primary school enrolment ratio female (%)',
       'Net primary school enrolment ratio male (%)',
       'Population (in thousands) total'],
      dtype='object')


In [6]:
# Show the datatypes of columns
print("Data types:", df.dtypes)

Data types: Country                                                    object
CountryID                                                   int64
Continent                                                   int64
Adolescent fertility rate (%)                             float64
Adult literacy rate (%)                                   float64
Gross national income per capita (PPP international $)    float64
Net primary school enrolment ratio female (%)             float64
Net primary school enrolment ratio male (%)               float64
Population (in thousands) total                           float64
dtype: object


In [10]:
# Select a series
country_series=df['Country']


In [11]:
# check datatype of series
type(country_series)

pandas.core.series.Series

In [12]:
print(country_series.index)

RangeIndex(start=0, stop=202, step=1)


In [13]:
# Convert Pandas Series into List
print(country_series.values)

['Afghanistan' 'Albania' 'Algeria' 'Andorra' 'Angola'
 'Antigua and Barbuda' 'Argentina' 'Armenia' 'Australia' 'Austria'
 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados' 'Belarus'
 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bhutan' 'Bolivia'
 'Bosnia and Herzegovina' 'Botswana' 'Brazil' 'Brunei Darussalam'
 'Bulgaria' 'Burkina Faso' 'Burundi' 'Cambodia' 'Cameroon' 'Canada'
 'Cape Verde' 'Central African Republic' 'Chad' 'Chile' 'China' 'Colombia'
 'Comoros' 'Congo, Dem. Rep.' 'Congo, Rep.' 'Cook Islands' 'Costa Rica'
 "Cote d'Ivoire" 'Croatia' 'Cuba' 'Cyprus' 'Czech Republic' 'Denmark'
 'Djibouti' 'Dominica' 'Dominican Republic' 'Ecuador' 'Egypt'
 'El Salvador' 'Equatorial Guinea' 'Eritrea' 'Estonia' 'Ethiopia' 'Fiji'
 'Finland' 'France' 'French Polynesia' 'Gabon' 'Gambia' 'Georgia'
 'Germany' 'Ghana' 'Greece' 'Grenada' 'Guatemala' 'Guinea' 'Guinea-Bissau'
 'Guyana' 'Haiti' 'Honduras' 'Hong Kong, China' 'Hungary' 'Iceland'
 'India' 'Indonesia' 'Iran (Islamic Republic of)' 'Iraq' 'I

In [14]:
# Country name
print(country_series.name)

Country


In [18]:
# Pandas Series Slicing
country_series[150:155]

150                              Rwanda
151               Saint Kitts and Nevis
152                         Saint Lucia
153    Saint Vincent and the Grenadines
154                               Samoa
Name: Country, dtype: object

## Statistics

In [20]:
# Import pandas 
import pandas as pd

# Load data using read_csv() 
df = pd.read_csv("C:/Users/USER/Desktop/BETKOT Municipality Program/YASH SLIDES/Class3_NumPy_Plot/WHO_first9cols.csv")

# Show initial 3 records
df.head(3)

Unnamed: 0,Country,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
0,Afghanistan,1,1,151.0,28.0,,,,26088.0
1,Albania,2,2,27.0,98.7,6000.0,93.0,94.0,3172.0
2,Algeria,3,3,6.0,69.9,5940.0,94.0,96.0,33351.0


In [23]:
df.shape

(202, 9)

In [24]:
# Describe the dataset
df.describe()

Unnamed: 0,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
count,202.0,202.0,177.0,131.0,178.0,179.0,179.0,189.0
mean,101.5,3.579208,59.457627,78.871756,11250.11236,84.03352,85.698324,34099.64
std,58.456537,1.808263,49.105286,20.41576,12586.753417,17.788047,15.451212,131837.7
min,1.0,1.0,0.0,23.6,260.0,6.0,11.0,2.0
25%,51.25,2.0,19.0,68.4,2112.5,79.0,79.5,1328.0
50%,101.5,3.0,46.0,86.5,6175.0,90.0,90.0,6640.0
75%,151.75,5.0,91.0,95.3,14502.5,96.0,96.0,20971.0
max,202.0,7.0,199.0,99.8,60870.0,100.0,100.0,1328474.0


In [133]:
# Count number of observation
df.count()

Country                                                   202
CountryID                                                 202
Continent                                                 202
Adolescent fertility rate (%)                             177
Adult literacy rate (%)                                   131
Gross national income per capita (PPP international $)    178
Net primary school enrolment ratio female (%)             179
Net primary school enrolment ratio male (%)               179
Population (in thousands) total                           189
dtype: int64

In [25]:
# Compute median of all the columns
df.median()

  df.median()


CountryID                                                  101.5
Continent                                                    3.0
Adolescent fertility rate (%)                               46.0
Adult literacy rate (%)                                     86.5
Gross national income per capita (PPP international $)    6175.0
Net primary school enrolment ratio female (%)               90.0
Net primary school enrolment ratio male (%)                 90.0
Population (in thousands) total                           6640.0
dtype: float64

In [26]:
# Compute minimum of all the columns
df.min()

Country                                                   Afghanistan
CountryID                                                           1
Continent                                                           1
Adolescent fertility rate (%)                                     0.0
Adult literacy rate (%)                                          23.6
Gross national income per capita (PPP international $)          260.0
Net primary school enrolment ratio female (%)                     6.0
Net primary school enrolment ratio male (%)                      11.0
Population (in thousands) total                                   2.0
dtype: object

In [27]:
# Compute maximum of all the columns
df.max()

Country                                                    Zimbabwe
CountryID                                                       202
Continent                                                         7
Adolescent fertility rate (%)                                 199.0
Adult literacy rate (%)                                        99.8
Gross national income per capita (PPP international $)      60870.0
Net primary school enrolment ratio female (%)                 100.0
Net primary school enrolment ratio male (%)                   100.0
Population (in thousands) total                           1328474.0
dtype: object

In [28]:
# Compute standard deviation of all the columns
df.std()


  df.std()


CountryID                                                     58.456537
Continent                                                      1.808263
Adolescent fertility rate (%)                                 49.105286
Adult literacy rate (%)                                       20.415760
Gross national income per capita (PPP international $)     12586.753417
Net primary school enrolment ratio female (%)                 17.788047
Net primary school enrolment ratio male (%)                   15.451212
Population (in thousands) total                           131837.708677
dtype: float64

## Grouping Pandas DataFrames

In [29]:
df.head()

Unnamed: 0,Country,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
0,Afghanistan,1,1,151.0,28.0,,,,26088.0
1,Albania,2,2,27.0,98.7,6000.0,93.0,94.0,3172.0
2,Algeria,3,3,6.0,69.9,5940.0,94.0,96.0,33351.0
3,Andorra,4,2,,,,83.0,83.0,74.0
4,Angola,5,3,146.0,67.4,3890.0,49.0,51.0,16557.0


In [31]:
# Group By Dataframe on the basis of Continent column
df.groupby('Continent').mean()

  df.groupby('Continent').mean()


Unnamed: 0_level_0,CountryID,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,110.238095,37.3,76.9,14893.529412,85.789474,88.315789,16843.35
2,100.333333,20.5,97.911538,19777.083333,92.911111,93.088889,17259.627451
3,99.354167,111.644444,61.690476,3050.434783,67.574468,72.021277,16503.195652
4,56.285714,49.6,91.6,24524.0,95.0,94.4,73577.333333
5,94.774194,77.888889,87.940909,7397.142857,89.137931,88.517241,15637.241379
6,121.228571,39.26087,87.607143,12167.2,89.04,89.96,25517.142857
7,80.777778,57.333333,69.8125,2865.555556,85.444444,88.888889,317683.666667


In [140]:
df.groupby('Continent').mean()['Adult literacy rate (%)']

  df.groupby('Continent').mean()['Adult literacy rate (%)']


Continent
1    76.900000
2    97.911538
3    61.690476
4    91.600000
5    87.940909
6    87.607143
7    69.812500
Name: Adult literacy rate (%), dtype: float64

## Joins

In [21]:
# Import pandas 
import pandas as pd

# Load data using read_csv() 
dest = pd.read_csv("C:/Users/USER/Desktop/BETKOT Municipality Program/YASH SLIDES/Class3_NumPy_Plot/dest.csv")

# Show DataFrame
dest.head()

Unnamed: 0,EmpNr,Dest
0,5,The Hague
1,3,Amsterdam
2,9,Rotterdam


In [22]:
# Load data using read_csv() 
tips = pd.read_csv("C:/Users/USER/Desktop/BETKOT Municipality Program/YASH SLIDES/Class3_NumPy_Plot/tips.csv")

# Show DataFrame
tips.head()

Unnamed: 0,EmpNr,Amount
0,5,10.0
1,9,5.0
2,7,2.5


In [273]:
# Join DataFrames using Inner Join
df_inner= pd.merge(dest, tips, on='EmpNr', how='inner')
df_inner.head()

Unnamed: 0,EmpNr,Dest,Amount
0,5,The Hague,10.0
1,9,Rotterdam,5.0


In [274]:
# Join DataFrames using Outer Join
df_outer= pd.merge(dest, tips, on='EmpNr', how='outer')
df_outer.head()

Unnamed: 0,EmpNr,Dest,Amount
0,5,The Hague,10.0
1,3,Amsterdam,
2,9,Rotterdam,5.0
3,7,,2.5


In [275]:
# Join DataFrames using Right Outer Join
df_right= pd.merge(dest, tips, on='EmpNr', how='right')
df_right

Unnamed: 0,EmpNr,Dest,Amount
0,5,The Hague,10.0
1,9,Rotterdam,5.0
2,7,,2.5


In [276]:
# Join DataFrames using Left Outer Join
df_left= pd.merge(dest, tips, on='EmpNr', how='left')
df_left

Unnamed: 0,EmpNr,Dest,Amount
0,5,The Hague,10.0
1,3,Amsterdam,
2,9,Rotterdam,5.0


## Missing Values

In [23]:
# Import pandas 
import pandas as pd

# Load data using read_csv() 
df = pd.read_csv("C:/Users/USER/Desktop/BETKOT Municipality Program/YASH SLIDES/Class3_NumPy_Plot/WHO_first9cols.csv")

# Show initial 5 records
df.head()

Unnamed: 0,Country,CountryID,Continent,Adolescent fertility rate (%),Adult literacy rate (%),Gross national income per capita (PPP international $),Net primary school enrolment ratio female (%),Net primary school enrolment ratio male (%),Population (in thousands) total
0,Afghanistan,1,1,151.0,28.0,,,,26088.0
1,Albania,2,2,27.0,98.7,6000.0,93.0,94.0,3172.0
2,Algeria,3,3,6.0,69.9,5940.0,94.0,96.0,33351.0
3,Andorra,4,2,,,,83.0,83.0,74.0
4,Angola,5,3,146.0,67.4,3890.0,49.0,51.0,16557.0


In [35]:
# Count missing values in DataFrame
pd.isnull(df).sum()

Country                                                    0
CountryID                                                  0
Continent                                                  0
Adolescent fertility rate (%)                             25
Adult literacy rate (%)                                   71
Gross national income per capita (PPP international $)    24
Net primary school enrolment ratio female (%)             23
Net primary school enrolment ratio male (%)               23
Population (in thousands) total                           13
dtype: int64

In [36]:
# Count missing values in DataFrame
df.isnull().sum()

Country                                                    0
CountryID                                                  0
Continent                                                  0
Adolescent fertility rate (%)                             25
Adult literacy rate (%)                                   71
Gross national income per capita (PPP international $)    24
Net primary school enrolment ratio female (%)             23
Net primary school enrolment ratio male (%)               23
Population (in thousands) total                           13
dtype: int64