In [12]:
import pandas as pd
import numpy as np

In [8]:
data = {
      'name': ['Murunga','James','Eburu','William','Wanyonyi'],
      'city': ['Kitale', 'Kilifi', 'Kisii', 'Eldoret','Thika'],
      'age': [23, 25, 24, 55, 38],
      'math-score': [87.0, 62.0, 51.0, 70.0, 65.0]
      }

row_labels = [101, 102, 103, 104, 105]


In [11]:
df = pd.DataFrame(data=data,index=row_labels)
df

Unnamed: 0,name,city,age,math-score
101,Murunga,Kitale,23,87.0
102,James,Kilifi,25,62.0
103,Eburu,Kisii,24,51.0
104,William,Eldoret,55,70.0
105,Wanyonyi,Thika,38,65.0


if index option is not given then it will defaults RangeIndex

In [16]:
df = pd.DataFrame(data=data)
df

Unnamed: 0,name,city,age,math-score
0,Murunga,Kitale,23,87.0
1,James,Kilifi,25,62.0
2,Eburu,Kisii,24,51.0
3,William,Eldoret,55,70.0
4,Wanyonyi,Thika,38,65.0


It’s possible to control the order of the columns with the columns parameter and the row labels with index:

In [15]:
df = pd.DataFrame(data=data,index=row_labels,columns=['city','name','math-score','age'])
df

Unnamed: 0,city,name,math-score,age
101,Kitale,Murunga,87.0,23
102,Kilifi,James,62.0,25
103,Kisii,Eburu,51.0,24
104,Eldoret,William,70.0,55
105,Thika,Wanyonyi,65.0,38


creating a dataframe from a list of nested list

In [39]:
student_marks = [
    [50,78,80,34,68,99,95],
    [45,55,65,75,80,85,90],
    [31,41,51,61,71,81,91]
]
student_names = ['Jerry','Peter','John']
subjects = ['history','geography','maths','english','kiswahili','business','agriculture']

subject_marks = pd.DataFrame(data=student_marks,
                             columns=subjects,
                             index=student_names)
subject_marks

Unnamed: 0,history,geography,maths,english,kiswahili,business,agriculture
Jerry,50,78,80,34,68,99,95
Peter,45,55,65,75,80,85,90
John,31,41,51,61,71,81,91


using a list of dictionaries

In [23]:
student_marks = [
    {subjects[0]:50, subjects[1]:78, subjects[2]:80, subjects[3]:34, subjects[4]:68, subjects[5]:99, subjects[6]:95},
    {subjects[0]:45, subjects[1]:55, subjects[2]:65, subjects[3]:75, subjects[4]:80, subjects[5]:85, subjects[6]:90},
    {subjects[0]:31, subjects[1]:41, subjects[2]:51, subjects[3]:61, subjects[4]:71, subjects[5]:81, subjects[6]:91}
]

subject_marks = pd.DataFrame(data=student_marks,
                             index=student_names)
subject_marks

Unnamed: 0,history,geography,maths,english,kiswahili,business,agriculture
Jerry,50,78,80,34,68,99,95
Peter,45,55,65,75,80,85,90
John,31,41,51,61,71,81,91


constructing dataframes from arrays

In [34]:
arr = np.array([[1, 2, 100],
                [2, 4, 100],
                [3, 8, 100]])
df_ = pd.DataFrame(arr, columns=['x', 'y', 'z'])
df_

Unnamed: 0,x,y,z
0,1,2,100
1,2,4,100
2,3,8,100


When copy is set to False (its default setting), the data from the NumPy array isn’t copied. This means that the original data from the array is assigned to the pandas DataFrame. If you modify the array, then your DataFrame will change too

In [35]:
arr[0, 0] = 100

In [36]:
df_

Unnamed: 0,x,y,z
0,100,2,100
1,2,4,100
2,3,8,100


In [37]:
arr = np.array([[1, 2, 100],
                [2, 4, 100],
                [3, 8, 100]])
df_ = pd.DataFrame(arr, columns=['x', 'y', 'z'],copy=True)
df_

Unnamed: 0,x,y,z
0,1,2,100
1,2,4,100
2,3,8,100


creating pandas dataframe from files

In [40]:
subject_marks.to_csv('student_marks.csv')

index_col=0 specifies that the row labels are located in the first column of the CSV file.

In [45]:
st_data = pd.read_csv('student_marks.csv',index_col=0)
st_data

Unnamed: 0,history,geography,maths,english,kiswahili,business,agriculture
Jerry,50,78,80,34,68,99,95
Peter,45,55,65,75,80,85,90
John,31,41,51,61,71,81,91


You can get the DataFrame’s row labels with .index and its column labels with .columns:

In [49]:
st_data.index

Index(['Jerry', 'Peter', 'John'], dtype='object')

modifying row labels for a dataframe

In [55]:
st_data.index = ["Daniel","Murunga","Julius"]

In [56]:
st_data.index

Index(['Daniel', 'Murunga', 'Julius'], dtype='object')

In [51]:
st_data.columns

Index(['history', 'geography', 'maths', 'english', 'kiswahili', 'business',
       'agriculture'],
      dtype='object')

In [52]:
st_data.columns[1]

'geography'

In [53]:
st_data.columns[0:4]

Index(['history', 'geography', 'maths', 'english'], dtype='object')

changing column labels

In [57]:
st_data.columns = ['histo', 'geo', 'math', 'eng', 'kisw', 'bsn',
       'agri']

In [58]:
st_data.columns

Index(['histo', 'geo', 'math', 'eng', 'kisw', 'bsn', 'agri'], dtype='object')