# Creating a dataframe from an array

In [72]:
import pandas as pd
import numpy as np

## Option 1

In [73]:
# Creating an array
data1 = np.array([[1, 3],[6, 9], [12, 15]])
data1

array([[ 1,  3],
       [ 6,  9],
       [12, 15]])

In [74]:
# Creating a dataframe
df = pd.DataFrame(data1, index=["row1", "row2", "row3"],
            columns = ["Col1", "Col2"])
df

Unnamed: 0,Col1,Col2
row1,1,3
row2,6,9
row3,12,15


## Option 2

In [75]:
# Creating an array with list shape
data = [[1, 3, 2],[6, 9, 7], [12, 15, 11]]
data

[[1, 3, 2], [6, 9, 7], [12, 15, 11]]

In [76]:
# Creating a dataframe
df = pd.DataFrame(data, index=["row1", "row2", "row3"],
            columns = ["Col1", "Col2", "Col3"])
df

Unnamed: 0,Col1,Col2,Col3
row1,1,3,2
row2,6,9,7
row3,12,15,11


# Creating a DataFrame from a dictionary

In [77]:
# lists used for this example
states = ["California", "New York", "Florida", "Texas"]
population = [333, 444, 555, 666]

In [78]:
# Storing lists within a dictionary
dict_states = {"States": states, "Population": population}
dict_states


{'States': ['California', 'New York', 'Florida', 'Texas'],
 'Population': [333, 444, 555, 666]}

In [79]:
# Creating the dataframe
df_population = pd.DataFrame (dict_states, index = [1, 2, 3, 4], 
                              columns = ["States", "Population"])
df_population

Unnamed: 0,States,Population
1,California,333
...,...,...
4,Texas,666


# Creating a DataFrame form a CSV file

In [80]:
# Reading the CSV File
df_exam = pd.read_csv("StudentsPerformance.csv")


In [81]:
# show the first 5 rows of the CSV file StudentsPerformance.csv
df_exam.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
...,...,...,...,...,...,...,...,...
4,male,group C,some college,standard,none,76,78,75


In [82]:
# TO show the complete file 
df_exam

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
...,...,...,...,...,...,...,...,...
999,female,group D,some college,free/reduced,none,77,86,86


In [83]:
# show the last 5 rows of the CSV file StudentsPerformance.csv
df_exam.tail()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
995,female,group E,master's degree,standard,completed,88,99,95
...,...,...,...,...,...,...,...,...
999,female,group D,some college,free/reduced,none,77,86,86


In [84]:
#show last 'n' rows in a dataframe
df_exam.tail(10)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
990,male,group E,high school,free/reduced,completed,86,81,75
...,...,...,...,...,...,...,...,...
999,female,group D,some college,free/reduced,none,77,86,86


In [85]:
# Getting access to the shape attribute (We must know the rows and columns first)
df_exam.shape

(1000, 8)

In [None]:

# Display "n" rows. 
# It will keep the number of rows and columns the same but it will only show the first 'n' rows. 
# So that we can see the rows in the dataframe and also the columns as a sample.

pd.set_option("display.max_rows", 5)

df_exam

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
...,...,...,...,...,...,...,...,...
998,female,group D,some college,standard,completed,68,78,77
999,female,group D,some college,free/reduced,none,77,86,86
