# Column Manipulation of Pandas DataFrames

### 1. Import libraries and dependencies

In [3]:
# Import pandas, pathlib, and numpy libraries
import pandas as pd
from pathlib import Path
import numpy as np
%matplotlib inline

### 2. Create a Path to the File Using Pathlib

In [4]:
# Use the Pathlib libary to set the path to the CSV
people_reordered_data = Path("../Resources/people_reordered.csv")
people_data = Path("../Resources/people.csv")

### 3. Read the CSV into a Pandas DataFrame and Display a Few Rows

In [5]:
# Use the file path to read the CSV into a DataFrame and display a few rows
people_reordered_df = pd.read_csv(people_reordered_data)
people_df = pd.read_csv(people_data)
people_df.head()

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 4. View Column Names

In [7]:
# Use the `columns` attribute to output the column names
people_df.columns

Index(['id', 'first_name', 'last_name', 'email', 'gender', 'uni_grad',
       'job_title', 'Income'],
      dtype='object')

### 5. View Column Data Types

In [8]:
# Use the `dtypes` attribute to output the column names and data types
people_df.dtypes

id            float64
first_name     object
last_name      object
email          object
gender         object
uni_grad       object
job_title      object
Income        float64
dtype: object

### 6. Rename Columns Using List

In [12]:
# Set the `columns` attribute to a new list of column names
column_names = ['Person_ID', 'First_Name', 'Last_Name', 'Email', 'Gender', 'University', 'Occupation', 'Salary']
people_df.columns = column_names
people_df

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,mcrumptonrn@qq.com,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0
996,997.0,Gunar,Gilford,ggilfordro@yandex.ru,Male,Smolny University,Marketing Manager,76109.0
997,998.0,Lucretia,Gurling,lgurlingrp@de.vu,Female,Institut Teknologi Telkom,Software Engineer III,92115.0
998,999.0,Andrew,Yang,ayang@codedrills.com,Male,Rutgers University School of Business,Curriculum Engineer,60000.0


### 7. Rename Columns Using Dictionary

In [13]:
# Use the `rename` function and set the `columns` parameter to a dictionary of new column names
column_rename = {"Person_ID":"Person_ID", "First_Name":"First_Name", "Last_Name":"Last_Name", "Email":"Email_Address", "Gender":"Gender", "Univerity":"University", "Occupation":"Occupation", "Salary": "Salary"}
people_df.rename(columns=column_rename)

Unnamed: 0,Person_ID,First_Name,Last_Name,Email_Address,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0
...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,mcrumptonrn@qq.com,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0
996,997.0,Gunar,Gilford,ggilfordro@yandex.ru,Male,Smolny University,Marketing Manager,76109.0
997,998.0,Lucretia,Gurling,lgurlingrp@de.vu,Female,Institut Teknologi Telkom,Software Engineer III,92115.0
998,999.0,Andrew,Yang,ayang@codedrills.com,Male,Rutgers University School of Business,Curriculum Engineer,60000.0


### 8. Re-order Columns

In [15]:
# Use a list of re-ordered column names to alter the column order of the original DataFrame
people_df.columns
people_df =people_df[['Person_ID', 'First_Name', 'Last_Name', 'Gender', 'University', 'Occupation', 'Salary', 'Email']]
people_df

Unnamed: 0,Person_ID,First_Name,Last_Name,Gender,University,Occupation,Salary,Email
0,1.0,Keriann,Lenormand,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com
1,2.0,Huntley,Rupke,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com
2,3.0,Gorden,Dalgarnowch,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com
3,4.0,Cullie,,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com
4,5.0,Ariel,Strangman,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com
...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0,mcrumptonrn@qq.com
996,997.0,Gunar,Gilford,Male,Smolny University,Marketing Manager,76109.0,ggilfordro@yandex.ru
997,998.0,Lucretia,Gurling,Female,Institut Teknologi Telkom,Software Engineer III,92115.0,lgurlingrp@de.vu
998,999.0,Andrew,Yang,Male,Rutgers University School of Business,Curriculum Engineer,60000.0,ayang@codedrills.com


### 9. Create Additional Column

In [21]:
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows

people_df['Age'] = np.random.randint(low=22, high=65, size=1000)
people_df['Age_Copy'] = np.random.randint(low=22, high=65, size=1000)
people_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  people_df['Age'] = np.random.randint(low=22, high=65, size=1000)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  people_df['Age_Copy'] = np.random.randint(low=22, high=65, size=1000)


Unnamed: 0,Person_ID,First_Name,Last_Name,Gender,University,Occupation,Salary,Email,Age,Age_Copy
0,1.0,Keriann,Lenormand,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,56,49
1,2.0,Huntley,Rupke,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,31,25
2,3.0,Gorden,Dalgarnowch,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,52,60
3,4.0,Cullie,,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62,49
4,5.0,Ariel,Strangman,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,60,43
...,...,...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0,mcrumptonrn@qq.com,64,53
996,997.0,Gunar,Gilford,Male,Smolny University,Marketing Manager,76109.0,ggilfordro@yandex.ru,63,44
997,998.0,Lucretia,Gurling,Female,Institut Teknologi Telkom,Software Engineer III,92115.0,lgurlingrp@de.vu,31,25
998,999.0,Andrew,Yang,Male,Rutgers University School of Business,Curriculum Engineer,60000.0,ayang@codedrills.com,38,60


### 10. Delete Columns

In [23]:
# Use the `drop` function to delete the newly created `Age_Copy` column
people_df = people_df.drop(columns='Age_Copy')
people_df

Unnamed: 0,Person_ID,First_Name,Last_Name,Gender,University,Occupation,Salary,Email,Age
0,1.0,Keriann,Lenormand,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,56
1,2.0,Huntley,Rupke,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,31
2,3.0,Gorden,Dalgarnowch,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,52
3,4.0,Cullie,,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62
4,5.0,Ariel,Strangman,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,60
...,...,...,...,...,...,...,...,...,...
995,996.0,Meta,Crumpton,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0,mcrumptonrn@qq.com,64
996,997.0,Gunar,Gilford,Male,Smolny University,Marketing Manager,76109.0,ggilfordro@yandex.ru,63
997,998.0,Lucretia,Gurling,Female,Institut Teknologi Telkom,Software Engineer III,92115.0,lgurlingrp@de.vu,31
998,999.0,Andrew,Yang,Male,Rutgers University School of Business,Curriculum Engineer,60000.0,ayang@codedrills.com,38


### 11. Save Modified DataFrame to New CSV

In [24]:
# Save the DataFrame to the `Resources` folder
people_df.to_csv("../Resources/people_df.csv")