In [1]:
'''What is a DataFrame?
A Pandas DataFrame is a 2 dimensional data structure, like a 2 dimensional array, or a table with rows and columns.
'''
#Create a simple Pandas DataFrame:
import pandas as pd
data = {
    "calories": [420, 380, 390],
    "duration": [50, 40, 45]
}
#load data into a dataframe object:
df = pd.DataFrame(data)
print(df)

   calories  duration
0       420        50
1       380        40
2       390        45


In [2]:
'''Locate Row
As you can see from the result above, the DataFrame is like a table with rows and columns.

Pandas use the loc attribute to return one or more specified row(s)

Example
Return row 0:'''

#refer to the row index:
print(df.loc[0])
#Note: This example returns a Pandas Series.

calories    420
duration     50
Name: 0, dtype: int64


In [3]:
#Return row 0 and 1:
#use a list of indexes:
print(df.loc[[0, 1]])
#Note: When using [], the result is a Pandas DataFrame.

   calories  duration
0       420        50
1       380        40


In [4]:
'''Named Indexes
With the index argument, you can name your own indexes.

Example
Add a list of names to give each row a name:'''

import pandas as pd

data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}

df = pd.DataFrame(data, index = ["day1", "day2", "day3"])
print(df)

      calories  duration
day1       420        50
day2       380        40
day3       390        45


In [5]:
'''Locate Named Indexes
Use the named index in the loc attribute to return the specified row(s).

Example
Return "day2":'''
#refer to the named index:
print(df.loc["day2"])

calories    380
duration     40
Name: day2, dtype: int64


In [6]:
'''Load Files Into a DataFrame
If your data sets are stored in a file, Pandas can load them into a DataFrame.

Example
Load a comma separated file (CSV file) into a DataFrame:'''
import pandas as pd
df = pd.read_csv('data.csv')
print(df)

    Index      Customer Id First Name Last Name  \
0       1  DD37Cf93aecA6Dc     Sheryl    Baxter   
1       2  1Ef7b82A4CAAD10    Preston    Lozano   
2       3  6F94879bDAfE5a6        Roy     Berry   
3       4  5Cef8BFA16c5e3c      Linda     Olsen   
4       5  053d585Ab6b3159     Joanna    Bender   
..    ...              ...        ...       ...   
95     96  cb8E23e48d22Eae       Karl     Greer   
96     97  CeD220bdAaCfaDf       Lynn  Atkinson   
97     98  28CDbC0dFe4b1Db       Fred    Guerra   
98     99  c23d1D9EE8DEB0A     Yvonne    Farmer   
99    100  2354a0E336A91A1   Clarence    Haynes   

                            Company               City  \
0                   Rasmussen Group       East Leonard   
1                       Vega-Gentry  East Jimmychester   
2                     Murillo-Perry      Isabelborough   
3   Dominguez, Mcmillan and Donovan         Bensonview   
4          Martin, Lang and Andrade     West Priscilla   
..                              ...    

In [7]:
#Pandas Read CSV
'''Read CSV Files
A simple way to store big data sets is to use CSV files (comma separated files).

CSV files contains plain text and is a well know format that can be read by everyone including Pandas.

In our examples we will be using a CSV file called 'data.csv'.

Download data.csv. or Open data.csv'''
#Load the CSV into a DataFrame:
import pandas as pd
df = pd.read_csv('data.csv')
print(df.to_string())

    Index      Customer Id  First Name    Last Name                          Company                 City                                       Country                 Phone 1                 Phone 2                               Email Subscription Date                            Website
0       1  DD37Cf93aecA6Dc      Sheryl       Baxter                  Rasmussen Group         East Leonard                                         Chile            229.077.5154        397.884.0519x718            zunigavanessa@smith.info        2020-08-24         http://www.stephenson.com/
1       2  1Ef7b82A4CAAD10     Preston       Lozano                      Vega-Gentry    East Jimmychester                                      Djibouti              5153435776        686-620-1820x944                     vmata@colon.com        2021-04-23              http://www.hobbs.com/
2       3  6F94879bDAfE5a6         Roy        Berry                    Murillo-Perry        Isabelborough                           A

In [8]:
#Tip: use to_string() to print the entire DataFrame.
'''If you have a large DataFrame with many rows, Pandas will only return the first 5 rows, and the last 5 rows:

Example
Print the DataFrame without the to_string() method:
'''
import pandas as pd
df = pd.read_csv('data.csv')
print(df)

    Index      Customer Id First Name Last Name  \
0       1  DD37Cf93aecA6Dc     Sheryl    Baxter   
1       2  1Ef7b82A4CAAD10    Preston    Lozano   
2       3  6F94879bDAfE5a6        Roy     Berry   
3       4  5Cef8BFA16c5e3c      Linda     Olsen   
4       5  053d585Ab6b3159     Joanna    Bender   
..    ...              ...        ...       ...   
95     96  cb8E23e48d22Eae       Karl     Greer   
96     97  CeD220bdAaCfaDf       Lynn  Atkinson   
97     98  28CDbC0dFe4b1Db       Fred    Guerra   
98     99  c23d1D9EE8DEB0A     Yvonne    Farmer   
99    100  2354a0E336A91A1   Clarence    Haynes   

                            Company               City  \
0                   Rasmussen Group       East Leonard   
1                       Vega-Gentry  East Jimmychester   
2                     Murillo-Perry      Isabelborough   
3   Dominguez, Mcmillan and Donovan         Bensonview   
4          Martin, Lang and Andrade     West Priscilla   
..                              ...    

In [9]:
'''max_rows
The number of rows returned is defined in Pandas option settings.

You can check your system's maximum rows with the pd.options.display.max_rows statement.

Example
Check the number of maximum returned rows:'''
import pandas as pd
print(pd.options.display.max_rows)

60


In [10]:
'''In my system the number is 60, which means that if the DataFrame contains more than 60 rows, the print(df) statement will return only the headers and the first and last 5 rows.

You can change the maximum rows number with the same statement.

Example
Increase the maximum number of rows to display the entire DataFrame:
'''
import pandas as pd
pd.options.display.max_rows = 9999
df = pd.read_csv('data.csv')
print(df)

    Index      Customer Id  First Name    Last Name  \
0       1  DD37Cf93aecA6Dc      Sheryl       Baxter   
1       2  1Ef7b82A4CAAD10     Preston       Lozano   
2       3  6F94879bDAfE5a6         Roy        Berry   
3       4  5Cef8BFA16c5e3c       Linda        Olsen   
4       5  053d585Ab6b3159      Joanna       Bender   
5       6  2d08FB17EE273F4       Aimee        Downs   
6       7  EA4d384DfDbBf77      Darren         Peck   
7       8  0e04AFde9f225dE       Brett       Mullen   
8       9  C2dE4dEEc489ae0      Sheryl       Meyers   
9      10  8C2811a503C7c5a    Michelle    Gallagher   
10     11  216E205d6eBb815        Carl    Schroeder   
11     12  CEDec94deE6d69B       Jenna       Dodson   
12     13  e35426EbDEceaFF      Tracey         Mata   
13     14  A08A8aF8BE9FaD4    Kristine          Cox   
14     15  6fEaA1b7cab7B6C       Faith         Lutz   
15     16  8cad0b4CBceaeec     Miranda      Beasley   
16     17  a5DC21AE3a21eaA    Caroline        Foley   
17     18 