1. Reading CSV Files in Python

1.1 Using csv.reader

In [0]:
import csv

rows = []
with open("/Volumes/hexaware_databricks/default/file/cpudatatextad.csv", 'r') as file:
    csvreader = csv.reader(file)
    header = next(csvreader)
    for row in csvreader:
        rows.append(row)

print(header)
print(rows)


['Processor', 'Cores', 'Speed(GHz)', 'Price']
[['Intel i3', '2', '3.1', '120'], ['Intel i5', '4', '3.5', '200'], ['Intel i7', '8', '3.9', '350']]


1.2 Using .readlines()

In [0]:
with open('/Volumes/hexaware_databricks/default/file/cpudatatextad.csv') as file:
    content = file.readlines()

header = content[:1]
rows = content[1:]

print(header)
print(rows)

['Processor,Cores,Speed(GHz),Price\n']
['Intel i3,2,3.1,120\n', 'Intel i5,4,3.5,200\n', 'Intel i7,8,3.9,350\n']


1.3 Using Pandas

In [0]:
import pandas as pd

data = pd.read_csv("/Volumes/hexaware_databricks/default/file/Salary_Data.csv")
print(data)

# Get column names
print(data.columns)

# Access a column
print(data['Salary'])

   YearsExperience  Salary
0              1.1   39343
1              1.3   46205
2              1.5   37731
3              2.0   43525
4              2.2   39891
Index(['YearsExperience', 'Salary'], dtype='object')
0    39343
1    46205
2    37731
3    43525
4    39891
Name: Salary, dtype: int64


1.4 Using csv.DictReader

In [0]:
import csv

with open('/Volumes/hexaware_databricks/default/file/Salary_Data.csv', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row)

{'YearsExperience': '1.1', 'Salary': '39343'}
{'YearsExperience': '1.3', 'Salary': '46205'}
{'YearsExperience': '1.5', 'Salary': '37731'}
{'YearsExperience': '2.0', 'Salary': '43525'}
{'YearsExperience': '2.2', 'Salary': '39891'}


2. Writing CSV Files in Python

2.1 Using csv.writer

In [0]:
import csv

header = ['Name', 'M1 Score', 'M2 Score']
data = [['Alex', 62, 80], ['Brad', 45, 56], ['Joey', 85, 98]]

filename = '/Volumes/hexaware_databricks/default/file/Students_Data.csv'
with open(filename, 'w', newline="") as file:
    csvwriter = csv.writer(file)
    csvwriter.writerow(header)
    csvwriter.writerows(data)

2.2 Using .writelines()

In [0]:
header = ['Name', 'M1 Score', 'M2 Score']
data = [['Alex', 62, 80], ['Brad', 45, 56], ['Joey', 85, 98]]

filename = 'Student_scores.csv'
with open(filename, 'w') as file:
    for h in header:
        file.write(str(h) + ', ')
    file.write('\n')
    for row in data:
        for x in row:
            file.write(str(x) + ', ')
        file.write('\n')

2.3 Using Pandas

In [0]:
import pandas as pd

header = ['Name', 'M1 Score', 'M2 Score']
data = [['Alex', 62, 80], ['Brad', 45, 56], ['Joey', 85, 98]]

df = pd.DataFrame(data, columns=header)
df.to_csv('Stu_data.csv', index=False)

2.4 Using csv.DictWriter

In [0]:
import csv

data = [
    {'Name': 'Alex', 'M1 Score': 62, 'M2 Score': 80},
    {'Name': 'Brad', 'M1 Score': 45, 'M2 Score': 56},
    {'Name': 'Joey', 'M1 Score': 85, 'M2 Score': 98}
]

with open('/Volumes/hexaware_databricks/default/file/Students_Data.csv', 'w', newline='') as csvfile:
    fieldnames = ['Name', 'M1 Score', 'M2 Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

3. Converting CSV to List

3.1 Method 1: Using CSV Module

In [0]:
import csv

with open('/Volumes/hexaware_databricks/default/file/sample.csv', 'r') as read_obj:
    csv_reader = csv.reader(read_obj)
    list_of_csv = list(csv_reader)

display(list_of_csv)

_1,_2,_3,_4
JAN,34,360,417
FEB,31,342,391
MAR,36,406,419
APR,34,396,461
MAY,36,420,472
JUN,43,472,535


3.2 Method 2: Iterating Over CSV

In [0]:
import csv

with open('/Volumes/hexaware_databricks/default/file/example.csv') as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
        print(row)
        print(row[0])
        print(row[0], row[1], row[2])
        print("\n")

['Name', 'Age', 'City']
Name
Name Age City


['Alice', '25', 'New York']
Alice
Alice 25 New York


['Bob', '30', 'London']
Bob
Bob 30 London


['Charlie', '28', 'Sydney']
Charlie
Charlie 28 Sydney




3.3 Method 3: Using Pandas

In [0]:
import pandas as pd

dict_data = {
    'series': ['Friends', 'Money Heist', 'Marvel'],
    'episodes': [200, 50, 45],
    'actors': ['David Crane', 'Alvaro', 'Stan Lee']
}

df = pd.DataFrame(dict_data)
display(df)

series,episodes,actors
Friends,200,David Crane
Money Heist,50,Alvaro
Marvel,45,Stan Lee
