### DataFrames
store tabular data where you can label the rows and the columns.

#### DataFrame from dictionary

In [2]:
import pandas as pd

# Build cars DataFrame
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
cars_dict = { 'country':names, 'drives_right':dr, 'cars_per_cap':cpc }
cars = pd.DataFrame(cars_dict)
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45


#### Set the row attribute

In [15]:
# Definition of row_labels
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']

# Specify row labels of cars
cars.index = row_labels

           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JPN          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


### Useful exploration
#### Methods
- `.head()` first few rows
- `.info()` name of columns, data type and number of missing values...
- `.describe()` summary statistics

#### Attributes
- `.shape` attribute for rows x columns
- `.values` converts to a 2D numpy array
- `.columns` An index of columns: the column names.
- `.index` An index for the rows: either row numbers or row names

In [14]:
cars.head()

Unnamed: 0,country,drives_right,cars_per_cap
0,United States,True,809
1,Australia,False,731
2,Japan,False,588
3,India,False,18
4,Russia,True,200


In [15]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   country       7 non-null      object
 1   drives_right  7 non-null      bool  
 2   cars_per_cap  7 non-null      int64 
dtypes: bool(1), int64(1), object(1)
memory usage: 175.0+ bytes


In [16]:
cars.shape

(7, 3)

In [17]:
cars.describe()

Unnamed: 0,cars_per_cap
count,7.0
mean,351.571429
std,345.595552
min,18.0
25%,57.5
50%,200.0
75%,659.5
max,809.0


In [18]:
cars.values

array([['United States', True, 809],
       ['Australia', False, 731],
       ['Japan', False, 588],
       ['India', False, 18],
       ['Russia', True, 200],
       ['Morocco', True, 70],
       ['Egypt', True, 45]], dtype=object)

In [19]:
cars.columns

Index(['country', 'drives_right', 'cars_per_cap'], dtype='object')

In [20]:
cars.index

Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')

### Save to csv

In [3]:
# Print cars again
print(cars)
cars.to_csv('cars.csv',index=True)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45


#### Read csv

In [4]:
# Import pandas as pd
import pandas as pd

# Import the cars.csv data: cars
cars = pd.read_csv('cars.csv')

# Print out cars
print(cars)

   Unnamed: 0        country  drives_right  cars_per_cap
0           0  United States          True           809
1           1      Australia         False           731
2           2          Japan         False           588
3           3          India         False            18
4           4         Russia          True           200
5           5        Morocco          True            70
6           6          Egypt          True            45


In [5]:
#Fix import by including index_col
cars = pd.read_csv('cars.csv',index_col=0)

# Print out cars
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45
