

# Data Manipulation - Filters

## Learnings:

- rename columns in a DataFrame
- manipulate columns in a DataFrame (select, reorder, delete)
- filter dataframe
- assign to a column based on a condition

In [1]:
import pandas as pd

data = pd.read_csv('data/vehicles.csv')
data.head(2)
                   

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550


In [2]:
data.shape

(35952, 15)

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35952 entries, 0 to 35951
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Make                     35952 non-null  object 
 1   Model                    35952 non-null  object 
 2   Year                     35952 non-null  int64  
 3   Engine Displacement      35952 non-null  float64
 4   Cylinders                35952 non-null  float64
 5   Transmission             35952 non-null  object 
 6   Drivetrain               35952 non-null  object 
 7   Vehicle Class            35952 non-null  object 
 8   Fuel Type                35952 non-null  object 
 9   Fuel Barrels/Year        35952 non-null  float64
 10  City MPG                 35952 non-null  int64  
 11  Highway MPG              35952 non-null  int64  
 12  Combined MPG             35952 non-null  int64  
 13  CO2 Emission Grams/Mile  35952 non-null  float64
 14  Fuel Cost/Year        

## Checking the dataframe column names

Rename all columns at once:
- `data.columns` is an **attribute** of the DataFrame which results in a list-like of the column names
    - You can substitute it by another list containing the names you want 
    - Note you have to substitute the whole set of column names at once
    
- `data.rename()` is a **method** of a DataFrame, in which you can rename one column at once
    - You just need to pass a dictionary containing {'old_name':'new_name'} 
    - By default, it changes names of a **index** (`axis=0`), you can specify `axis=1` to change **column** names
    - the `inplace` argument

In [4]:
data.columns

Index(['Make', 'Model', 'Year', 'Engine Displacement', 'Cylinders',
       'Transmission', 'Drivetrain', 'Vehicle Class', 'Fuel Type',
       'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG',
       'CO2 Emission Grams/Mile', 'Fuel Cost/Year'],
      dtype='object')

### Substituting `.columns` attribute

In [None]:
# say for example we want to convert all columns to lowercase!

In [5]:
data.columns = ['make', 'Model', 'Year', 'Engine Displacement', 'Cylinders',
       'Transmission', 'Drivetrain', 'Vehicle Class', 'Fuel Type',
       'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG',
       'CO2 Emission Grams/Mile', 'Fuel Cost/Year']

In [6]:
data.head()

Unnamed: 0,make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


In [7]:
data.columns = ['make', 'model', 'Year', 'Engine Displacement', 'Cylinders',
               'Transmission', 'Drivetrain', 'Vehicle Class', 'Fuel Type',
               'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG',
               'CO2 Emission Grams/Mile', 'Fuel Cost/Year']

In [10]:
colnames = []
for col in data.columns:
    colnames.append(col.lower())

In [14]:
[col.lower().replace(' ','_').replace('/','_') for col in data.columns]

['make',
 'model',
 'year',
 'engine_displacement',
 'cylinders',
 'transmission',
 'drivetrain',
 'vehicle_class',
 'fuel_type',
 'fuel_barrels_year',
 'city_mpg',
 'highway_mpg',
 'combined_mpg',
 'co2_emission_grams_mile',
 'fuel_cost_year']

In [24]:
data.columns = [col.lower().replace(' ','_').replace('/','_') for col in data.columns]

In [25]:
data.head()

Unnamed: 0,make,model,year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


In [15]:
data.columns = ['manufacturer']

ValueError: Length mismatch: Expected axis has 15 elements, new values have 1 elements

### `.rename() method`

`.rename({'old_column':'new_column'})`

#### returning a new dataframe

In [16]:
data.head(1)

Unnamed: 0,make,model,year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950


In [28]:
data.rename({'make': 'manufacturer'}, axis=1)

Unnamed: 0,manufacturer,model,year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [33]:
data.rename(columns={'make': 'manufacturer', 'year':'model_year'})

Unnamed: 0,manufacturer,model,model_year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [34]:
data = data.rename(columns={'make': 'manufacturer', 'year':'model_year'})

In [35]:
data.head()

Unnamed: 0,manufacturer,model,model_year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


#### inplace

In [36]:
data.rename({'engine_displacement': 'engine_displacement2',
             'vehicle_class': 'vehicle_class2'}, axis=1, inplace=True)

In [37]:
# dataframe already changed
data.head()

Unnamed: 0,manufacturer,model,model_year,engine_displacement2,cylinders,transmission,drivetrain,vehicle_class2,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


If you try to assign an `inplace=True` command, check what happens:

In [30]:
data.rename({'year3': 'year10'}, axis=1)

Unnamed: 0,manufacturer,model,model_year,engine_displacement2,cylinders,transmission,drivetrain,vehicle_class2,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile,fuel_cost_year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [41]:
data = data.rename({'year': 'year3'}, axis=1, inplace=True)

In [42]:
data.head()

AttributeError: 'NoneType' object has no attribute 'head'

In [43]:
print(data)

None


Two options:
> 1. store it again on the variable `data`: 

    data = data.rename(columns={'Make':'Manufacturer', 'Year':'ANO'})
> 2. Use the inplace argument `inplace =  True` to change the values within the dataframe automatically

    data.rename(columns={'Make':'Manufacturer', 'Year':'ANO'}, inplace=True)
    

In [None]:
# You can also assign to a different variable, of course
renamed_data = data.rename(columns={'make':'Manufacturer', 'year3':'ANO'})

In [None]:
renamed_data.head(2)

In [None]:
data.head(2)

## Reordering columns in a dataframe

>    - Remember you always pass a list of columns to access a dataframe

Just select the columns in a different order and overwrite the previous dataframe

In [51]:
data.head()

Unnamed: 0,make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


In [39]:
data[['model', 'manufacturer']]

Unnamed: 0,model,manufacturer
0,DJ Po Vehicle 2WD,AM General
1,FJ8c Post Office,AM General
2,Post Office DJ5 2WD,AM General
3,Post Office DJ8 2WD,AM General
4,GNX,ASC Incorporated
...,...,...
35947,fortwo coupe,smart
35948,fortwo coupe,smart
35949,fortwo coupe,smart
35950,fortwo coupe,smart


In [55]:
data[sorted(data.columns)]

Unnamed: 0,CO2 Emission Grams/Mile,City MPG,Combined MPG,Cylinders,Drivetrain,Engine Displacement,Fuel Barrels/Year,Fuel Cost/Year,Fuel Type,Highway MPG,Model,Transmission,Vehicle Class,Year,make
0,522.764706,18,17,4.0,2-Wheel Drive,2.5,19.388824,1950,Regular,17,DJ Po Vehicle 2WD,Automatic 3-spd,Special Purpose Vehicle 2WD,1984,AM General
1,683.615385,13,13,6.0,2-Wheel Drive,4.2,25.354615,2550,Regular,13,FJ8c Post Office,Automatic 3-spd,Special Purpose Vehicle 2WD,1984,AM General
2,555.437500,16,16,4.0,Rear-Wheel Drive,2.5,20.600625,2100,Regular,17,Post Office DJ5 2WD,Automatic 3-spd,Special Purpose Vehicle 2WD,1985,AM General
3,683.615385,13,13,6.0,Rear-Wheel Drive,4.2,25.354615,2550,Regular,13,Post Office DJ8 2WD,Automatic 3-spd,Special Purpose Vehicle 2WD,1985,AM General
4,555.437500,14,16,6.0,Rear-Wheel Drive,3.8,20.600625,2550,Premium,21,GNX,Automatic 4-spd,Midsize Cars,1987,ASC Incorporated
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,244.000000,34,36,3.0,Rear-Wheel Drive,1.0,9.155833,1100,Premium,38,fortwo coupe,Auto(AM5),Two Seaters,2013,smart
35948,243.000000,34,36,3.0,Rear-Wheel Drive,1.0,9.155833,1100,Premium,38,fortwo coupe,Auto(AM5),Two Seaters,2014,smart
35949,244.000000,34,36,3.0,Rear-Wheel Drive,1.0,9.155833,1100,Premium,38,fortwo coupe,Auto(AM5),Two Seaters,2015,smart
35950,246.000000,34,36,3.0,Rear-Wheel Drive,0.9,9.155833,1100,Premium,39,fortwo coupe,Auto(AM6),Two Seaters,2016,smart


In [56]:
data = pd.read_csv('data/vehicles.csv')

In [57]:
data.columns = [col.lower().replace(' ','_').replace('/','_') for col in data.columns]

In [58]:
data.columns

Index(['make', 'model', 'year', 'engine_displacement', 'cylinders',
       'transmission', 'drivetrain', 'vehicle_class', 'fuel_type',
       'fuel_barrels_year', 'city_mpg', 'highway_mpg', 'combined_mpg',
       'co2_emission_grams_mile', 'fuel_cost_year'],
      dtype='object')

In [51]:
data[['fuel_cost_year', 'highway_mpg','make', 'model', 'year', 'engine_displacement', 'cylinders',
       'transmission', 'drivetrain', 'vehicle_class', 'fuel_type',
       'fuel_barrels_year', 'city_mpg', 'combined_mpg',
       'co2_emission_grams_mile']]

Unnamed: 0,fuel_cost_year,highway_mpg,make,model,year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,combined_mpg,co2_emission_grams_mile
0,1950,17,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,522.764706
1,2550,13,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,683.615385
2,2100,17,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,16,555.437500
3,2550,13,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,683.615385
4,2550,21,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,16,555.437500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,1100,38,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,36,244.000000
35948,1100,38,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,36,243.000000
35949,1100,38,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,36,244.000000
35950,1100,39,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,36,246.000000


In [49]:
data

Unnamed: 0,fuel_cost_year,make,model,year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg,co2_emission_grams_mile
0,1950,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706
1,2550,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385
2,2100,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500
3,2550,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385
4,2550,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,1100,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000
35948,1100,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000
35949,1100,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000
35950,1100,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000


In [54]:
data[['model','make']]

Unnamed: 0,model,make
0,DJ Po Vehicle 2WD,AM General
1,FJ8c Post Office,AM General
2,Post Office DJ5 2WD,AM General
3,Post Office DJ8 2WD,AM General
4,GNX,ASC Incorporated
...,...,...
35947,fortwo coupe,smart
35948,fortwo coupe,smart
35949,fortwo coupe,smart
35950,fortwo coupe,smart


In [53]:
data.loc[:, 'model','make'] # WRONG - not a list, you passed a string, string - not a list.

Unnamed: 0,model,make
0,DJ Po Vehicle 2WD,AM General
1,FJ8c Post Office,AM General
2,Post Office DJ5 2WD,AM General
3,Post Office DJ8 2WD,AM General
4,GNX,ASC Incorporated
...,...,...
35947,fortwo coupe,smart
35948,fortwo coupe,smart
35949,fortwo coupe,smart
35950,fortwo coupe,smart


How can I get the `fuel cost/year` variable and put it at the beginning of the dataframe

In [55]:
data.columns

Index(['fuel_cost_year', 'make', 'model', 'year', 'engine_displacement',
       'cylinders', 'transmission', 'drivetrain', 'vehicle_class', 'fuel_type',
       'fuel_barrels_year', 'city_mpg', 'highway_mpg', 'combined_mpg',
       'co2_emission_grams_mile'],
      dtype='object')

In [56]:
column_order = ['co2_emission_grams_mile','fuel_cost_year', 'make', 'model', 'year', 'engine_displacement',
       'cylinders', 'transmission', 'drivetrain', 'vehicle_class', 'fuel_type',
       'fuel_barrels_year', 'city_mpg', 'highway_mpg', 'combined_mpg']

data = data.loc[:, column_order]

In [57]:
data

Unnamed: 0,co2_emission_grams_mile,fuel_cost_year,make,model,year,engine_displacement,cylinders,transmission,drivetrain,vehicle_class,fuel_type,fuel_barrels_year,city_mpg,highway_mpg,combined_mpg
0,522.764706,1950,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17
1,683.615385,2550,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13
2,555.437500,2100,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16
3,683.615385,2550,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13
4,555.437500,2550,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,244.000000,1100,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36
35948,243.000000,1100,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36
35949,244.000000,1100,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36
35950,246.000000,1100,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36


In [59]:
# problems you may handle

# auto-assign a subset of the dataframe
data = data['make']

0              AM General
1              AM General
2              AM General
3              AM General
4        ASC Incorporated
               ...       
35947               smart
35948               smart
35949               smart
35950               smart
35951               smart
Name: make, Length: 35952, dtype: object

In [None]:
data.head(2)

In [72]:
data = pd.read_csv('data/vehicles.csv')

In [73]:
data

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


## Remove column (or row)

- The `.drop()` method
- By default, `.drop()` drops a row given its index.

In [92]:
data = pd.read_csv('data/vehicles.csv')

In [61]:
data

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [64]:
data.drop('Year')

KeyError: "['Year'] not found in axis"

In [85]:
data.drop('Year', axis=1)

Unnamed: 0,Make,Model,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [66]:
data.drop(1)

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
5,Acura,2.2CL/3.0CL,1997,2.2,4.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,14.982273,20,26,22,403.954545,1500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [69]:
data.drop(1).reset_index(drop=True)

TypeError: reset_index() got an unexpected keyword argument 'index'

## Deep vs Shallow copy on pandas

In [93]:
data_bkp = data

In [91]:
data_bkp

Unnamed: 0,manufacturer,model_1,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [94]:
data.rename({'Make':'manufacturer'}, axis=1, inplace=True)

In [95]:
data_bkp is data

True

In [96]:
data = data.rename({'Model':'model_1'}, axis=1)

In [97]:
data_bkp is data

False

In [83]:
data_bkp.head()

Unnamed: 0,manufacturer,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


In [74]:
data_bkp

Unnamed: 0,manufacturer,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [87]:
x=257
y=257
print(x,y)

257 257


In [89]:
x is y

True

In [115]:
data_bkp is data

True

In [100]:
data_bkp.head(2)

Unnamed: 0,manufacturer,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550


In [101]:
data_bkp = data.copy()

In [139]:
data = data.rename({'Model':'model'}, axis=1)

In [127]:
data.head(2)

Unnamed: 0,manufacturer,model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550


In [13]:
data_bkp.head(2)

NameError: name 'data_bkp' is not defined

In [133]:
data_bkp = data

In [134]:
data_bkp is data

True

In [135]:
data_bkp = data.copy()

In [98]:
data_bkp is data

False

In [103]:
data_bkp == data

manufacturer               35952
model_1                    35952
Year                       35952
Engine Displacement        35952
Cylinders                  35952
Transmission               35952
Drivetrain                 35952
Vehicle Class              35952
Fuel Type                  35952
Fuel Barrels/Year          35952
City MPG                   35952
Highway MPG                35952
Combined MPG               35952
CO2 Emission Grams/Mile    35952
Fuel Cost/Year             35952
dtype: int64

# Filter records
>    - `mask` concept
>    - `.query()` method

This is really important for data wrangling.

In [104]:
data = pd.read_csv('data/vehicles.csv')

In [105]:
data.head(2)

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550


## Simple Example: Starting with a numpy array. How can I filter the values of a list?

In [106]:
import numpy as np

In [122]:
my_array = np.array([1,2,3,4,5,6,7,8,9,10])

In [108]:
my_array

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [147]:
my_array * 10

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [148]:
my_array > 5

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

The results of `my_array > 5` is what is called **a mask**. A result containing the `True` and `False` results of an operation. 

In [149]:
my_array[5:]

array([ 6,  7,  8,  9, 10])

In [110]:
my_array[ [False, False, False, False, False,  True,  True,  True,  True, True] ]

array([ 6,  7,  8,  9, 10])

In [158]:
my_array[my_array>5]

array([ 6,  7,  8,  9, 10])

Masks can be used as an index to select data!

After selecting, you can do anything with it, for example assigning it. This operation is called a `vectorial` operation. It is done all at once.

In [124]:
my_array[my_array > 5] = 1000

In [125]:
my_array

array([   1,    2,    3,    4,    5, 1000, 1000, 1000, 1000, 1000])

In [126]:
my_matrix = np.random.randint(0, 10, size=(5,5))
my_matrix

array([[4, 2, 5, 1, 8],
       [6, 5, 8, 9, 9],
       [8, 5, 7, 9, 6],
       [6, 6, 3, 4, 4],
       [5, 0, 8, 8, 6]])

In [127]:
my_matrix > 5

array([[False, False, False, False,  True],
       [ True, False,  True,  True,  True],
       [ True, False,  True,  True,  True],
       [ True,  True, False, False, False],
       [False, False,  True,  True,  True]])

In [128]:
my_matrix[ my_matrix > 5 ] = -99999

In [129]:
my_matrix

array([[     4,      2,      5,      1, -99999],
       [-99999,      5, -99999, -99999, -99999],
       [-99999,      5, -99999, -99999, -99999],
       [-99999, -99999,      3,      4,      4],
       [     5,      0, -99999, -99999, -99999]])

In [130]:
my_array[ my_array > 5 ] = 10

In [131]:
my_array

array([ 1,  2,  3,  4,  5, 10, 10, 10, 10, 10])

You can also save the condition

In [141]:
my_array = np.array([1,2,3,4,5,6,7,8,9,10])
my_array_2 = np.arange(0,10)

In [168]:
my_array

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [133]:
condition = my_array > 5 
condition

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [134]:
my_array

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [135]:
my_array[ condition ]

array([ 6,  7,  8,  9, 10])

In [142]:
my_array_2

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [140]:
condition

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [144]:
my_array_2[my_array_2>5]

array([6, 7, 8, 9])

## Bitwise logical operators - Combining conditions

To make more than one condition together, you can use 
- `&` - analogous to `and`
- `|` - analogous to `or` 

For example, get all numbers from my_array that are greater than 3 and smaller than 8

Let's do it in steps:
- get values greater than 3

In [145]:
my_array[my_array > 3]

array([ 4,  5,  6,  7,  8,  9, 10])

- get values smaller than 8

In [146]:
my_array[my_array < 8]

array([1, 2, 3, 4, 5, 6, 7])

- get values greater than 3 and smaller than 8

In [147]:
greater_than_3 = my_array > 3

In [148]:
smaller_than_8 = my_array < 8

In [175]:
greater_than_3 

array([False, False, False,  True,  True,  True,  True,  True,  True,
        True])

In [176]:
smaller_than_8 

array([ True,  True,  True,  True,  True,  True,  True, False, False,
       False])

In [179]:
(my_array > 3) & (my_array < 8)

array([False, False, False,  True,  True,  True,  True, False, False,
       False])

In [180]:
# (my_array > 3) or (my_array < 8)
(my_array > 3) | (my_array < 8)


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [149]:
my_array > 3 & my_array < 8

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [151]:
greater_than_3 & smaller_than_8

array([False, False, False,  True,  True,  True,  True, False, False,
       False])

In [154]:
my_array[greater_than_3 & smaller_than_8]

array([4, 5, 6, 7])

In [152]:
my_array[(my_array > 3) & (my_array < 8)]

array([4, 5, 6, 7])

## Now in a dataframe

Let's find the rows in which the Cylinders values are exactly 6.

In [155]:
data

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [159]:
data['Cylinders']==6

0        False
1         True
2        False
3         True
4         True
         ...  
35947    False
35948    False
35949    False
35950    False
35951    False
Name: Cylinders, Length: 35952, dtype: bool

In [161]:
data[data['Cylinders']==6]

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
7,Acura,2.2CL/3.0CL,1997,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,16.480500,18,26,20,444.350000,1650
10,Acura,2.3CL/3.0CL,1998,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,16.480500,17,26,20,444.350000,1650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35900,Wallace Environmental,Wetl 300 GE,1991,2.9,6.0,Automatic 4-spd,4-Wheel or All-Wheel Drive,Special Purpose Vehicles,Premium,27.467500,11,13,12,740.583333,3400
35901,Wallace Environmental,Wetl 300 SL,1991,2.9,6.0,Automatic 4-spd,Rear-Wheel Drive,Minicompact Cars,Premium,21.974000,15,16,15,592.466667,2700
35902,Wallace Environmental,Wetl 300 SL,1992,2.9,6.0,Automatic 4-spd,Rear-Wheel Drive,Minicompact Cars,Premium,21.974000,15,16,15,592.466667,2700
35903,Wallace Environmental,Wetl 300 TE,1991,2.9,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize-Large Station Wagons,Premium,21.974000,15,16,15,592.466667,2700


In [162]:
city_more_15 = data['City MPG']>15 

In [163]:
city_more_15

0         True
1        False
2         True
3        False
4        False
         ...  
35947     True
35948     True
35949     True
35950     True
35951     True
Name: City MPG, Length: 35952, dtype: bool

In [191]:
data[city_more_15]

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
5,Acura,2.2CL/3.0CL,1997,2.2,4.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,14.982273,20,26,22,403.954545,1500
6,Acura,2.2CL/3.0CL,1997,2.2,4.0,Manual 5-spd,Front-Wheel Drive,Subcompact Cars,Regular,13.733750,22,28,24,370.291667,1400
7,Acura,2.2CL/3.0CL,1997,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,16.480500,18,26,20,444.350000,1650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [164]:
data.columns

Index(['Make', 'Model', 'Year', 'Engine Displacement', 'Cylinders',
       'Transmission', 'Drivetrain', 'Vehicle Class', 'Fuel Type',
       'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG',
       'CO2 Emission Grams/Mile', 'Fuel Cost/Year'],
      dtype='object')

In [165]:
fuel_col = [col.startswith('Fuel') for col in data.columns]

In [174]:
data.columns[fuel_col]

Index(['Fuel Type', 'Fuel Barrels/Year', 'Fuel Cost/Year'], dtype='object')

In [175]:
data[data.columns[fuel_col]]

Unnamed: 0,Fuel Type,Fuel Barrels/Year,Fuel Cost/Year
0,Regular,19.388824,1950
1,Regular,25.354615,2550
2,Regular,20.600625,2100
3,Regular,25.354615,2550
4,Premium,20.600625,2550
...,...,...,...
35947,Premium,9.155833,1100
35948,Premium,9.155833,1100
35949,Premium,9.155833,1100
35950,Premium,9.155833,1100


In [None]:
nome_dataframe.loc[linhas,colunas]

In [180]:
(data['Make']=='Chevrolet')

0        False
1        False
2        False
3        False
4        False
         ...  
35947    False
35948    False
35949    False
35950    False
35951    False
Name: Model, Length: 35952, dtype: bool

In [183]:
(data['Cylinders'] == 4) & (data['Make']=='Chevrolet')

0        False
1        False
2        False
3        False
4        False
         ...  
35947    False
35948    False
35949    False
35950    False
35951    False
Length: 35952, dtype: bool

In [184]:
data.loc[(data['Cylinders'] == 4) & (data['Make']=='Chevrolet'), fuel_col]

Unnamed: 0,Fuel Type,Fuel Barrels/Year,Fuel Cost/Year
4275,Regular,16.480500,1650
4279,Regular,16.480500,1650
4280,Regular,15.695714,1600
4281,Regular,14.982273,1500
4285,Regular,16.480500,1650
...,...,...,...
7913,Premium Gas or Electricity,3.092539,1100
7914,Premium Gas or Electricity,3.092539,1100
7915,Premium Gas or Electricity,3.092539,1100
7916,Regular Gas or Electricity,2.006844,800


### Example

In [185]:
# create a column with all zeroes named - 'fl_city_car'

data['fl_city_car'] = 0

In [200]:
data.columns

Index(['Make', 'Model', 'Year', 'Engine Displacement', 'Cylinders',
       'Transmission', 'Drivetrain', 'Vehicle Class', 'Fuel Type',
       'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG',
       'CO2 Emission Grams/Mile', 'Fuel Cost/Year'],
      dtype='object')

In [187]:
(data['City MPG']) > (data['Highway MPG'])

0         True
1        False
2        False
3        False
4        False
         ...  
35947    False
35948    False
35949    False
35950    False
35951    False
Length: 35952, dtype: bool

In [195]:
# assign 1 to 'fl_city_car' all cars that have 'City MPG' > 'Highway MPG'

data.loc[(data['City MPG']) > (data['Highway MPG']), 'fl_city_car_2'] = 1

In [196]:
data.columns

Index(['Make', 'Model', 'Year', 'Engine Displacement', 'Cylinders',
       'Transmission', 'Drivetrain', 'Vehicle Class', 'Fuel Type',
       'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG',
       'CO2 Emission Grams/Mile', 'Fuel Cost/Year', 'fl_city_car',
       'fl_city_car_2'],
      dtype='object')

In [199]:
data

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year,fl_city_car,fl_city_car_2
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950,1,1.0
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550,0,
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100,10,
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550,0,
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550,10,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100,10,
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100,10,
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100,10,
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100,10,


In [198]:
data.loc[(data['City MPG']) < (data['Highway MPG']), 'fl_city_car'] = 10

In [209]:
data.loc[(data['City MPG']) > (data['Highway MPG']), 'fl_city_car']

0        1.0
47       1.0
48       1.0
3069     1.0
3070     1.0
        ... 
33293    1.0
33294    1.0
33295    1.0
33374    1.0
33375    1.0
Name: fl_city_car, Length: 183, dtype: float64

## You can combine conditions

Cars from `Ford` and 6 `Cylinders`

In [None]:
data.loc[(data['Cylinders'] == 6) & (data['Make'] == 'Ford'), :]

In [None]:
# careful with:

data.loc[data['Make']=='Ford' & data['Cylinders']==6, :] # WRONG!!

## You can put the conditions in variables as well

In [None]:
condition1 = (data['Make']=='Ford')
condition2 = (data['Cylinders']==6)
condition3 = (data['Combined MPG'] < 18)

In [None]:
data.loc[condition1 & condition2 & condition3, :]

## Another way to do the same thing.

* using the method `query`

The method `query` receives a string in which you can say your condition. Important things:
- `.query()` is a method of your dataframe
- `.query()` method receives a string 
- Every word inside the string that is not `quoted` is considered a variable of your dataframe (so, for example `.query('Year == 1999')` will look for the variable `Year`. Another example: if you try to run `.query('Make == Ford')` will look both for the column name `Make` and the column named `Ford`. If you want the results of the column `Make` to match the **string** Ford, you have to run `.query('Make == "Ford"')`
- If your column has spaces, you have to call it using backticks like in **.query('\`Engine Displacement\` < 4')**:

In [210]:
data.query('Make == "Ford"')

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year,fl_city_car
11440,Ford,Aerostar Van,1986,2.3,4.0,Automatic 4-spd,Rear-Wheel Drive,Vans,Regular,17.347895,18,22,19,467.736842,1750,10.0
11441,Ford,Aerostar Van,1986,2.3,4.0,Manual 5-spd,Rear-Wheel Drive,Vans,Regular,13.733750,23,26,24,370.291667,1400,10.0
11442,Ford,Aerostar Van,1986,2.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Vans,Regular,19.388824,15,21,17,522.764706,1950,10.0
11443,Ford,Aerostar Van,1986,2.8,6.0,Manual 5-spd,Rear-Wheel Drive,Vans,Regular,18.311667,16,22,18,493.722222,1850,10.0
11444,Ford,Aerostar Van,1986,3.0,6.0,Manual 5-spd,Rear-Wheel Drive,Vans,Regular,17.347895,17,22,19,467.736842,1750,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14381,Ford,Windstar FWD Wagon,2000,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Minivan - 2WD,Regular,19.388824,15,21,17,522.764706,1950,10.0
14382,Ford,Windstar FWD Wagon,2000,3.8,6.0,Automatic 4-spd,Front-Wheel Drive,Minivan - 2WD,Regular,19.388824,15,21,17,522.764706,1950,10.0
14383,Ford,Windstar FWD Wagon,2001,3.8,6.0,Automatic 4-spd,Front-Wheel Drive,Minivan - 2WD,Regular,18.311667,16,22,18,493.722222,1850,10.0
14384,Ford,Windstar FWD Wagon,2002,3.8,6.0,Automatic 4-spd,Front-Wheel Drive,Minivan - 2WD,Regular,18.311667,16,21,18,493.722222,1850,10.0


In [201]:
data.query('Cylinders == 4 and Make == "Ford"')

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year,fl_city_car,fl_city_car_2
11440,Ford,Aerostar Van,1986,2.3,4.0,Automatic 4-spd,Rear-Wheel Drive,Vans,Regular,17.347895,18,22,19,467.736842,1750,10,
11441,Ford,Aerostar Van,1986,2.3,4.0,Manual 5-spd,Rear-Wheel Drive,Vans,Regular,13.733750,23,26,24,370.291667,1400,10,
11446,Ford,Aerostar Van,1987,2.3,4.0,Automatic 4-spd,Rear-Wheel Drive,Vans,Regular,16.480500,18,24,20,444.350000,1650,10,
11447,Ford,Aerostar Van,1987,2.3,4.0,Manual 5-spd,Rear-Wheel Drive,Vans,Regular,13.733750,23,26,24,370.291667,1400,10,
11477,Ford,Aerostar Wagon,1986,2.3,4.0,Manual 5-spd,Rear-Wheel Drive,Vans,Regular,14.982273,20,25,22,403.954545,1500,10,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14345,Ford,Transit Connect Wagon LWB FFV,2017,2.5,4.0,Automatic (S6),Front-Wheel Drive,Special Purpose Vehicle 2WD,Gasoline or E85,14.982273,19,27,22,407.000000,1500,10,
14346,Ford,Transit Connect Wagon LWB FWD,2014,2.5,4.0,Automatic (S6),Front-Wheel Drive,Special Purpose Vehicle 2WD,Regular,14.330870,20,28,23,391.000000,1450,10,
14347,Ford,Transit Connect Wagon LWB FWD,2015,2.5,4.0,Automatic (S6),Front-Wheel Drive,Special Purpose Vehicle 2WD,Regular,14.330870,20,28,23,391.000000,1450,10,
14348,Ford,Transit Connect Wagon LWB FWD,2016,2.5,4.0,Automatic (S6),Front-Wheel Drive,Special Purpose Vehicle 2WD,Regular,14.982273,19,27,22,405.000000,1500,10,


In [204]:
data.query('`City MPG` > `Highway MPG`')

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year,fl_city_car,fl_city_car_2
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950,1,1.0
47,Acura,ILX Hybrid,2013,1.5,4.0,Auto(AV-S7),Front-Wheel Drive,Compact Cars,Premium,8.673947,39,38,38,228.000000,1050,1,1.0
48,Acura,ILX Hybrid,2014,1.5,4.0,Auto(AV-S7),Front-Wheel Drive,Compact Cars,Premium,8.673947,39,38,38,228.000000,1050,1,1.0
3069,BMW,i3 REX,2014,0.6,2.0,Auto(A1),Rear-Wheel Drive,Subcompact Cars,Premium Gas or Electricity,1.563190,41,37,39,40.000000,1050,1,1.0
3070,BMW,i3 REX,2015,0.6,2.0,Automatic (A1),Rear-Wheel Drive,Subcompact Cars,Premium Gas or Electricity,1.563190,41,37,39,40.000000,1050,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33293,Toyota,Prius v,2015,1.8,4.0,Automatic (variable gear ratios),Front-Wheel Drive,Midsize Station Wagons,Regular,7.847857,44,40,42,211.000000,800,1,1.0
33294,Toyota,Prius v,2016,1.8,4.0,Automatic (variable gear ratios),Front-Wheel Drive,Midsize Station Wagons,Regular,7.847857,44,40,42,211.000000,800,1,1.0
33295,Toyota,Prius v,2017,1.8,4.0,Automatic (variable gear ratios),Front-Wheel Drive,Midsize Station Wagons,Regular,8.039268,43,39,41,217.000000,800,1,1.0
33374,Toyota,RAV4 Hybrid AWD,2016,2.5,4.0,Auto(AV-S6),All-Wheel Drive,Small Sport Utility Vehicle 4WD,Regular,9.988182,34,31,33,270.000000,1000,1,1.0


In [None]:
data.query('Cylinders == 4')

In [200]:
numero_cilindros = 6
data.query(f'Make == "Acura" and Cylinders == {numero_cilindros}')

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year,fl_city_car,fl_city_car_2
7,Acura,2.2CL/3.0CL,1997,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,16.480500,18,26,20,444.350000,1650,10,
10,Acura,2.3CL/3.0CL,1998,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,16.480500,17,26,20,444.350000,1650,10,
13,Acura,2.3CL/3.0CL,1999,3.0,6.0,Automatic 4-spd,Front-Wheel Drive,Subcompact Cars,Regular,16.480500,17,26,20,444.350000,1650,10,
16,Acura,2.5TL/3.2TL,1996,3.2,6.0,Automatic 4-spd,Front-Wheel Drive,Compact Cars,Premium,17.347895,17,22,19,467.736842,2150,10,
18,Acura,2.5TL/3.2TL,1997,3.2,6.0,Automatic 4-spd,Front-Wheel Drive,Compact Cars,Premium,17.347895,17,22,19,467.736842,2150,10,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,Acura,TSX,2014,3.5,6.0,Automatic (S5),Front-Wheel Drive,Compact Cars,Premium,14.330870,19,28,23,392.000000,1750,10,
303,Acura,ZDX 4WD,2010,3.7,6.0,Automatic (S6),All-Wheel Drive,Sport Utility Vehicle - 4WD,Premium,17.347895,16,23,19,467.736842,2150,10,
304,Acura,ZDX 4WD,2011,3.7,6.0,Automatic (S6),All-Wheel Drive,Sport Utility Vehicle - 4WD,Premium,17.347895,16,23,19,467.736842,2150,10,
305,Acura,ZDX 4WD,2012,3.7,6.0,Automatic (S6),All-Wheel Drive,Sport Utility Vehicle - 4WD,Premium,17.347895,16,23,19,467.736842,2150,10,


In [None]:
data.query('`City MPG` > `Highway MPG`')

In [None]:
numero_cilindros = 4
data.query(f'Make == "Acura" and Cylinders == {numero_cilindros}')

In [10]:
table_1 = np.random.choice([0,25,50,75,100],size=(3,5))
table_2 = np.random.choice(['   '],size=(3,5))



In [21]:
print(table_1)
table_2[table_1 == 100] = "max"
table_2[table_1 == 0] = "min"
table_2[(table_1 != 100) & (table_1 != 0)] = "med"




[[ 50  50  25 100  25]
 [ 75  50   0   0  50]
 [ 50  75  75  25 100]]


In [19]:
table_2


array([['med', 'med', 'med', 'max', 'med'],
       ['med', 'med', 'min', 'min', 'med'],
       ['med', 'med', 'med', 'med', 'max']], dtype='<U3')

In [22]:
np.where(table_1<25, 'Min',np.where(table_1>75,'Max','Media'))

array([['Media', 'Media', 'Media', 'Max', 'Media'],
       ['Media', 'Media', 'Min', 'Min', 'Media'],
       ['Media', 'Media', 'Media', 'Media', 'Max']], dtype='<U5')