# Pandas - DataFrames

DataFrames are an extremely powerful tool and a natural extension of the Pandas Series.<br>
A Pandas DataFrame consists of multiple Pandas Series that share index values.

In [None]:
import numpy as np
import pandas as pd

In [None]:
np.random.seed(101)
mydata = np.random.randint(0,101,(4,3))

In [None]:
mydata

In [None]:
myindex = ['CA', 'NY', 'AZ', 'TX']

In [None]:
mycolumns = ['Jan', 'Feb', 'March']

In [None]:
df = pd.DataFrame(mydata)

In [None]:
df # here, by default, column and row fields are filled with integers, untill we define it.

In [None]:
df = pd.DataFrame(mydata, myindex, mycolumns)
#df = pd.DataFrame(data = mydata, index = myindex, columns = mycolumns)

In [None]:
df

In [None]:
df.info()

#### **To Read a CSV File**

In [None]:
df = pd.read_csv("../input/tipscsv/tips.csv")

In [None]:
df

In [None]:
df.columns # do list down all the columns of the DataFrame

In [None]:
df.index

In [None]:
df.head(10) # returns first couple of rows of the DataFrame

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.describe() # Calculates very basic statistical values from the DataFrame

In [None]:
df.describe().transpose()

#### **Reading & Working with Columns, Operations on Columns**

In [None]:
df.head()

In [None]:
df['total_bill'] # To display columns

In [None]:
type(df['total_bill'])

In [None]:
col = ['total_bill', 'tip'] 
df[col] # df[['total_bill', 'tip']]

In [None]:
type(df[col])

* **To Create a new Column**

In [None]:
100 * df['tip'] / df['total_bill'] # Just like basic Mathematics

In [None]:
df['tip_percentage'] = 100 * df['tip'] / df['total_bill'] # Adds 'tip_percentage' column in the df DataFrame

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df['price_per_person'] = np.round(df['total_bill'] / df['size'],2) # If exsisting column is passed, 
                                                                   #it will update the values in each row

In [None]:
df.head()

In [None]:
df.drop('tip_percentage', axis = 1) # To drop a column, mention axis = 1, (axis = 0 for a row)

In [None]:
df # Above command doesn't make a permanent change to the dataframe
# To make permanent, add 'inplace = True'

In [None]:
df.drop('tip_percentage', axis = 1, inplace = True)
# df = df.drop('tip_percentage', axis = 1) 
# Re-assigning the value, Recommended

In [None]:
df # tip_percentage is permanently deleted

In [None]:
df.shape

Index 0 has rows, Axis 0<br>
Index 1 has columns, Axis 1

#### **Reading & Working with Rows, Operations on Rows**

In [None]:
df.head()

In [None]:
df.index

In [None]:
df.set_index('Payment ID') # Not Permanent
# This sets Payment ID as the labeled index for the df dataframe, 
# here, Payment ID is unique for every row, so we can use it as an index.
# And, name of index column is also Payment ID

In [None]:
df = df.set_index('Payment ID') # Permanent

In [None]:
df.head()

In [None]:
df = df.reset_index() # To reset the Index values

In [None]:
df.head()

In [None]:
df = df.set_index('Payment ID')

In [None]:
df.head()

* **To Grab a Row(s)**

In [None]:
df.iloc[0] # Integer index (default) based getting a row

In [None]:
df.loc['Sun2959'] # Labeled index based

In [None]:
df.iloc[0:4] # Grabbing multiple rows

In [None]:
df.loc[['Sun2959', 'Sun4608', 'Sun5260']] # Grabbing multiple rows using labeled index, pass a list as an argument

* **To Delete or Add a Row(s)**

In [None]:
df = df.drop('Sun2959', axis = 0) # Use labeled index only if present, using Integer index (default) will result in an error

In [None]:
df

In [None]:
one_row = df.iloc[0]

In [None]:
one_row

In [None]:
df = df.append(one_row) # Appends the row in the end of the dataframe

In [None]:
df