# PyDataset

https://github.com/iamaziz/PyDataset

In [1]:
# !pip install pydataset

In [2]:
import pandas as pd
import numpy as np

from pydataset import data

In [3]:
data().sort_values('dataset_id').head()

Unnamed: 0,dataset_id,title
183,AMSsurvey,American Math Society Survey Data
261,Accident,Ship Accidents
184,Adler,Experimenter Expectations
429,Aids2,Australian AIDS Survival Data
0,AirPassengers,Monthly Airline Passenger Numbers 1949-1960


In [4]:
data('Cars93', show_doc=True)

Cars93

PyDataset Documentation (adopted from R Documentation. The displayed examples are in R)

##  Data from 93 Cars on Sale in the USA in 1993

### Description

The `Cars93` data frame has 93 rows and 27 columns.

### Usage

    Cars93

### Format

This data frame contains the following columns:

`Manufacturer`

Manufacturer.

`Model`

Model.

`Type`

Type: a factor with levels `"Small"`, `"Sporty"`, `"Compact"`, `"Midsize"`,
`"Large"` and `"Van"`.

`Min.Price`

Minimum Price (in \$1,000): price for a basic version.

`Price`

Midrange Price (in \$1,000): average of `Min.Price` and `Max.Price`.

`Max.Price`

Maximum Price (in \$1,000): price for “a premium version”.

`MPG.city`

City MPG (miles per US gallon by EPA rating).

`MPG.highway`

Highway MPG.

`AirBags`

Air Bags standard. Factor: none, driver only, or driver & passenger.

`DriveTrain`

Drive train type: rear wheel, front wheel or 4WD; (factor).

`Cylinders`

Number of cylinders (missing for Mazda RX-7, which has a rotary e

In [5]:
df = data('Cars93')

In [6]:
df.head()

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
1,Acura,Integra,Small,12.9,15.9,18.8,25,31,,Front,...,5,177,102,68,37,26.5,11.0,2705,non-USA,Acura Integra
2,Acura,Legend,Midsize,29.2,33.9,38.7,18,25,Driver & Passenger,Front,...,5,195,115,71,38,30.0,15.0,3560,non-USA,Acura Legend
3,Audi,90,Compact,25.9,29.1,32.3,20,26,Driver only,Front,...,5,180,102,67,37,28.0,14.0,3375,non-USA,Audi 90
4,Audi,100,Midsize,30.8,37.7,44.6,19,26,Driver & Passenger,Front,...,6,193,106,70,37,31.0,17.0,3405,non-USA,Audi 100
5,BMW,535i,Midsize,23.7,30.0,36.2,22,30,Driver only,Rear,...,4,186,109,69,39,27.0,13.0,3640,non-USA,BMW 535i


# Value Counts

In [7]:
df['Manufacturer'].value_counts()

Ford             8
Chevrolet        8
Dodge            6
Pontiac          5
Mazda            5
Oldsmobile       4
Nissan           4
Volkswagen       4
Hyundai          4
Toyota           4
Buick            4
Honda            3
Subaru           3
Cadillac         2
Chrysler         2
Lincoln          2
Mercury          2
Mitsubishi       2
Acura            2
Audi             2
Eagle            2
Lexus            2
Geo              2
Mercedes-Benz    2
Volvo            2
Infiniti         1
Plymouth         1
Saturn           1
BMW              1
Chrylser         1
Suzuki           1
Saab             1
Name: Manufacturer, dtype: int64

# Is In

`Passengers`

Passenger capacity (persons)

In [8]:
df['Passengers'].sort_values().unique()

array([2, 4, 5, 6, 7, 8])

In [9]:
seats_neeeded = [4, 5]

In [10]:
df[df['Passengers'].isin(seats_neeeded)]['Manufacturer'].value_counts()

Ford             6
Pontiac          4
Hyundai          4
Subaru           3
Volkswagen       3
Chevrolet        3
Nissan           3
Mazda            3
Dodge            3
Toyota           3
Honda            3
Mercury          2
Mitsubishi       2
Acura            2
Mercedes-Benz    2
Oldsmobile       2
Volvo            2
Geo              2
Lexus            2
Suzuki           1
Buick            1
Saab             1
Audi             1
Plymouth         1
Cadillac         1
BMW              1
Infiniti         1
Saturn           1
Eagle            1
Name: Manufacturer, dtype: int64

# Where

In [11]:
df['Consideration'] = np.where(df['Passengers'].isin(seats_neeeded), 'Consider', 'Not For Me')

In [12]:
df['Consideration'].value_counts()

Consider      64
Not For Me    29
Name: Consideration, dtype: int64

# Bonus - Jupyter Magic

@python_tip - https://twitter.com/python_tip

tweet - https://twitter.com/python_tip/status/1101511420141846528

In [13]:
%whos

Variable        Type         Data/Info
--------------------------------------
data            function     <function data at 0x11536bae8>
df              DataFrame         Manufacturer        <...>n\n[93 rows x 28 columns]
np              module       <module 'numpy' from '/Us<...>kages/numpy/__init__.py'>
pd              module       <module 'pandas' from '/U<...>ages/pandas/__init__.py'>
seats_neeeded   list         n=2
