In [15]:
# Inspect a DataFrame - Shape and Size
import pandas as pd
import numpy as np
presidents_df = pd.read_csv('datasets/president_heights_party.csv', index_col='name')
                                  
print(presidents_df.shape)
print(presidents_df.size)

(45, 4)
180


In [3]:
presidents_df.head()

Unnamed: 0_level_0,order,age,height,party
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
George Washington,1,57,189,none
John Adams,2,61,170,federalist
Thomas Jefferson,3,57,189,democratic-republican
James Madison,4,57,163,democratic-republican
James Monroe,5,58,183,democratic-republican


In [4]:
presidents_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, George Washington to Donald J. Trump
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   order   45 non-null     int64 
 1   age     45 non-null     int64 
 2   height  45 non-null     int64 
 3   party   45 non-null     object
dtypes: int64(3), object(1)
memory usage: 2.8+ KB


In [7]:
# Indexing in Pandas
# Rows with .loc

print(presidents_df.loc['Abraham Lincoln']) #result is Pandas series
print(presidents_df.loc['Abraham Lincoln'].shape)


order             16
age               52
height           193
party     republican
Name: Abraham Lincoln, dtype: object
(4,)


In [8]:
print(presidents_df.loc['Abraham Lincoln':'Ulysses S. Grant']) #result is dataframe

                  order  age  height           party
name                                                
Abraham Lincoln      16   52     193      republican
Andrew Johnson       17   56     178  national union
Ulysses S. Grant     18   46     173      republican


In [9]:
# Rows with .iloc
print(presidents_df.iloc[15])
print(presidents_df.iloc[15:18])

order             16
age               52
height           193
party     republican
Name: Abraham Lincoln, dtype: object
                  order  age  height           party
name                                                
Abraham Lincoln      16   52     193      republican
Andrew Johnson       17   56     178  national union
Ulysses S. Grant     18   46     173      republican


In [10]:
# Both .loc[ ] and .iloc[ ] may be used with a boolean array to subset the data.

In [11]:
presidents_df.describe()

Unnamed: 0,order,age,height
count,45.0,45.0,45.0
mean,23.022222,55.0,180.0
std,13.136502,6.595453,6.977236
min,1.0,42.0,163.0
25%,12.0,51.0,175.0
50%,23.0,55.0,182.0
75%,34.0,58.0,183.0
max,45.0,70.0,193.0


In [12]:
presidents_df['party'].value_counts()

republican               19
democratic               15
whig                      4
democratic-republican     4
federalist                1
national union            1
none                      1
Name: party, dtype: int64

In [13]:
presidents_df.groupby('party').mean()

Unnamed: 0_level_0,order,age,height
party,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
democratic,26.066667,52.6,181.066667
democratic-republican,4.5,57.25,176.5
federalist,2.0,61.0,170.0
national union,17.0,56.0,178.0
none,1.0,57.0,189.0
republican,29.631579,55.263158,180.894737
whig,11.0,58.25,176.0


In [18]:
presidents_df.groupby('party')['height'].agg(['min',np.median,'max'])

Unnamed: 0_level_0,min,median,max
party,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
democratic,168,180,193
democratic-republican,163,177,189
federalist,170,170,170
national union,178,178,178
none,189,189,189
republican,168,182,193
whig,173,174,183


In [19]:
print(presidents_df.groupby('party')\
    .agg({'height': [np.median, np.mean],
        'age':    [min, max]}))

                      height             age    
                      median        mean min max
party                                           
democratic               180  181.066667  43  65
democratic-republican    177  176.500000  57  58
federalist               170  170.000000  61  61
national union           178  178.000000  56  56
none                     189  189.000000  57  57
republican               182  180.894737  42  70
whig                     174  176.000000  50  68


**Problem: Data Science - Reshape**  
Task  
Given a list of numbers and the number of rows (r), reshape the list into a 2-dimensional array. Note that r divides the length of the list evenly.

Input Format  
First line: an integer (r) indicating the number of rows of the 2-dimensional array  
Next line: numbers separated by the space

Output Format  
An numpy 2d array of values rounded to the second decimal.

Sample Input  
2  <br>
1.2 0 0.5 -1  

Sample Output  
[[ 1.2 0. ]
[ 0.5 -1. ]]  

Explanation
The required number of the rows is 2, and we are given a list of 4 numbers; as a result the 2d array should be 2 x 2. 
So the first row is the first two number and the second row contains the next two numbers in given list.

In [20]:
import numpy as np

r = int(input()) 
lst = [float(x) for x in input().split()]
arr = np.array(lst)
cols = int(len(lst)/r)

ndarr = arr.reshape(r,cols)
print(ndarr)

2
1.2 0 0.5 -1
[[ 1.2  0. ]
 [ 0.5 -1. ]]
