# Column Manipulation of Pandas DataFrames

### 1. Import libraries and dependencies

In [1]:
# Import pandas, pathlib, and numpy libraries
import pandas as pd
from pathlib import Path
import numpy as np

### 2. Create a Path to the File Using Pathlib

In [2]:
# Use the Pathlib libary to set the path to the CSV
csvpath = Path("../Resources/people.csv")

### 3. Read the CSV into a Pandas DataFrame and Display a Few Rows

In [3]:
# Use the file path to read the CSV into a DataFrame and display a few rows
people_df = pd.read_csv(csvpath)
people_df.head()

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 4. View Columns Names

In [4]:
# Use the `columns` attribute to output the column names
people_df.columns

Index(['id', 'first_name', 'last_name', 'email', 'gender', 'uni_grad',
       'job_title', 'Income'],
      dtype='object')

### 5. View Column Data Types

In [5]:
# Use the `dtypes` attribute to output the column names and data types
people_df.dtypes

id            float64
first_name     object
last_name      object
email          object
gender         object
uni_grad       object
job_title      object
Income        float64
dtype: object

### 6. Rename Columns Using List

In [6]:
# Set the `columns` attribute to a new list of column names
columns = ["Person_ID", "First_Name", "Last_Name", "Email", "Gender", "University", "Occupation", "Salary"]
people_df.columns = columns
people_df.head()

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 7. Rename Columns Using Dictionary

In [7]:
# Use the `rename` function and set the `columns` parameter to a dictionary of new column names
people_df = people_df.rename(columns={
    "id": "Person_ID",
    "first_name": "First_Name",
    "last_name": "Last_Name", 
    "email": "Email",
    "gender": "Gender",
    "uni_grad": "University_Grad",
    "job_title": "Occupation",
    "Income": "Salary"
})

people_df.head()

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 8. Re-order Columns

In [17]:
people_df[people_df["Person_ID"]>40]
# people_df.sort_values("Salary")

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
40,41.0,Gilberto,Trulock,gtrulock14@google.com.hk,Male,Universidad Centroamericana de Ciencias Empres...,Desktop Support Technician,50013.0
41,42.0,Horatius,Reilly,hreilly15@bing.com,Male,Preston Institute of Management Sciences and T...,Systems Administrator II,108483.0
42,43.0,Kati,Dodgshun,kdodgshun16@i2i.jp,Female,Rivers State University of Science and Technology,Legal Assistant,70565.0
43,44.0,Jillie,Bickle,jbickle17@cargocollective.com,Female,University of Insurance and Banking in Warsaw,Operator,116522.0
44,45.0,Hadlee,Jenicek,hjenicek18@histats.com,Male,Krishna University,VP Product Management,90837.0
45,46.0,Eba,MacCague,emaccague19@omniture.com,Female,Toin University of Yokohama,Graphic Designer,90212.0
46,47.0,Darya,Pettie,dpettie1a@walmart.com,Female,Instituto Universitario de La Paz,Environmental Tech,57954.0
47,48.0,Edmund,Tupper,etupper1b@ted.com,Male,Hokkaido University of Education,VP Product Management,112162.0
48,49.0,Fredrick,Angove,fangove1c@newyorker.com,Male,Bahcesehir University,Desktop Support Technician,83617.0
49,50.0,Gabrila,Semeniuk,gsemeniuk1d@harvard.edu,Female,Changwon National University,Programmer Analyst II,94271.0


In [8]:
# Use a list of re-ordered column names to alter the column order of the original DataFrame
people_df = people_df[["Person_ID", "Last_Name", "First_Name", "Gender", "University", "Occupation", "Salary", "Email"]]
people_df.head()

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com
3,4.0,Putten,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com


### 9. Create Additional Column

In [9]:
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows
people_df["Age"] = np.random.randint(low=22, high=65, size=1000)
people_df["Age_Copy"] = np.random.randint(low=22, high=65, size=1000)
people_df.head()

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age,Age_Copy
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,27,42
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,22,51
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,40,43
3,4.0,Putten,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62,56
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,47,45


### 10. Delete Columns

In [19]:
people_df.dtypes

Person_ID     float64
First_Name     object
Last_Name      object
Email          object
Gender         object
University     object
Occupation     object
Salary        float64
dtype: object

In [20]:
# Use the `drop` function to delete the newly created `Age` column
people_df = people_df.drop(columns=["First_Name"])
people_df.head()

Unnamed: 0,Person_ID,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 11. Save Modified DataFrame to New CSV

In [11]:
# Save the DataFrame to the `Resources` folder
people_df.to_csv("../Resources/people_reordered.csv")