# Dataframes - Inspecting data

In [1]:
import pandas as pd
import numpy as np

In [2]:
students_score = {
    'Name' : ['Paul', 'Aaron', 'Krista', 'Veronica', 'Paxton', 'Madison', 'Aurora'],
    'Score': [98, 89, 99, 87, 90, 83, 82]
}

In [3]:
df_student = pd.DataFrame(students_score, index=['s1', 's2', 's3', 's4', 's5', 's6', 's7'])

df_student

Unnamed: 0,Name,Score
s1,Paul,98
s2,Aaron,89
s3,Krista,99
s4,Veronica,87
s5,Paxton,90
s6,Madison,83
s7,Aurora,82


## 1. Dataset overview

In [4]:
df_student.shape

(7, 2)

In [42]:
rows, columns = df_student.shape

print(f"Rows : {rows} \nColumns : {columns}")

Rows : 7 
Columns : 2


In [5]:
df_student.head()

Unnamed: 0,Name,Score
s1,Paul,98
s2,Aaron,89
s3,Krista,99
s4,Veronica,87
s5,Paxton,90


In [6]:
df_student.tail()

Unnamed: 0,Name,Score
s3,Krista,99
s4,Veronica,87
s5,Paxton,90
s6,Madison,83
s7,Aurora,82


In [7]:
df_student.columns

Index(['Name', 'Score'], dtype='object')

In [8]:
df_student.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, s1 to s7
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    7 non-null      object
 1   Score   7 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 168.0+ bytes


## 2. Accessing data

In [9]:
df_student['Name']

s1        Paul
s2       Aaron
s3      Krista
s4    Veronica
s5      Paxton
s6     Madison
s7      Aurora
Name: Name, dtype: object

In [10]:
df_student[['Name', 'Score']]

Unnamed: 0,Name,Score
s1,Paul,98
s2,Aaron,89
s3,Krista,99
s4,Veronica,87
s5,Paxton,90
s6,Madison,83
s7,Aurora,82


In [16]:
df_student[:2]

Unnamed: 0,Name,Score
s1,Paul,98
s2,Aaron,89


In [13]:
df_student.iloc[0:2]

Unnamed: 0,Name,Score
s1,Paul,98
s2,Aaron,89


In [25]:
df_student.loc['s2', ['Name', 'Score']]

Name     Aaron
Score       89
Name: s2, dtype: object

In [43]:
df_student.Name

s1        Paul
s2       Aaron
s3      Krista
s4    Veronica
s5      Paxton
s6     Madison
s7      Aurora
Name: Name, dtype: object

In [44]:
df_student.Score

s1    98
s2    89
s3    99
s4    87
s5    90
s6    83
s7    82
Name: Score, dtype: int64

## 3. Basic calculations

In [26]:
df_student['Score'].max()

99

In [31]:
df_student[df_student['Score'] == df_student['Score'].max()]

Unnamed: 0,Name,Score
s3,Krista,99


In [32]:
df_student['Score'].min()

82

In [33]:
df_student[df_student['Score'] == df_student['Score'].max()]

Unnamed: 0,Name,Score
s3,Krista,99


In [34]:
df_student['Score'].mean()

89.71428571428571

In [35]:
df_student[df_student['Score'] >= df_student['Score'].mean()]

Unnamed: 0,Name,Score
s1,Paul,98
s3,Krista,99
s5,Paxton,90


In [46]:
df_student.loc[df_student['Score'] >= df_student['Score'].mean()]

Unnamed: 0,Name,Score
s1,Paul,98
s3,Krista,99
s5,Paxton,90


## 4. Basic statistics & quantities

In [36]:
df_student.describe()

Unnamed: 0,Score
count,7.0
mean,89.714286
std,6.676184
min,82.0
25%,85.0
50%,89.0
75%,94.0
max,99.0


In [37]:
df_student.value_counts()

Name      Score
Aaron     89       1
Aurora    82       1
Krista    99       1
Madison   83       1
Paul      98       1
Paxton    90       1
Veronica  87       1
Name: count, dtype: int64

In [38]:
df_student['Score'].value_counts()

Score
98    1
89    1
99    1
87    1
90    1
83    1
82    1
Name: count, dtype: int64

In [45]:
df_student.count()

Name     7
Score    7
dtype: int64

In [39]:
df_student['Score'].count()

7