In [1]:
# Writing texts into a file

# Here you create a file with write ability

with open("account.txt", mode="w") as accounts:
    accounts.write("100 Jones 24.98\n")
    accounts.write("200 Doe 345.98\n")
    accounts.write("300 White 00.00\n")
    accounts.write("400 Stone -42.16\n")
    accounts.write("500 Rich 224.62\n")

In [4]:
!cat account.txt

100 Jones 24.98
200 Doe 345.98
300 White 00.00
400 Stone -42.16
500 Rich 224.62


In [9]:
# Reading a file object

with open("account.txt", mode="r") as accounts:
    print(f"{'Amount' :<10} {'Name' :<10} {'Balance' :>10}")
    # for each line, unpack
    for record in accounts:
        account, name, balance = record.split()
        print(f"{account :<10} {name :<10} {balance :>10}")


Amount     Name          Balance
100        Jones           24.98
200        Doe            345.98
300        White           00.00
400        Stone          -42.16
500        Rich           224.62


In [16]:
# Updating Text files

accounts = open("account.txt", "r")

temp_file = open("temp_file.txt", "w")

with accounts, temp_file:
    for record in accounts:
        account, name, balance = record.split()
        if account != "300":
            temp_file.write(record)
        else:
            new_record = " ".join([account, "Williams", balance])
            temp_file.write(new_record + "\n")



In [17]:
!cat temp_file.txt

100 Jones 24.98
200 Doe 345.98
300 Williams 00.00
400 Stone -42.16
500 Rich 224.62


In [18]:
# Working with JSON

import json

grades_dict = {"gradebook" : [
    {"student_id" : 1, "name" : "Red", "grade" : "A"},
    {"student_id" : 2, "name" : "Green", "grade" : "B"},
    {"student_id" : 3, "name" : "White", "grade" : "A"}
]}



# Open a new file with json extension and dump in the grades dict

with open("grades.json", mode="w") as grades:
    json.dump(grades_dict, grades)




In [19]:

with open("grades.json", "r") as grades:
    print(json.dumps(json.load(grades), indent = 4))

{
    "gradebook": [
        {
            "student_id": 1,
            "name": "Red",
            "grade": "A"
        },
        {
            "student_id": 2,
            "name": "Green",
            "grade": "B"
        },
        {
            "student_id": 3,
            "name": "White",
            "grade": "A"
        }
    ]
}


In [24]:
# Working with CSV files


import csv


with open("account.csv", mode="w", newline="") as accounts:
    writer = csv.writer(accounts)
    writer.writerow([100, "Jones", 345.67])
    writer.writerow([200, "Doe", 0.00])
    writer.writerow([300, "White", -42.16])
    writer.writerow([400, "Stone", 24.98])
    writer.writerow([500, "Rich", 224.62])

In [28]:
# reading the csv file

with open("account.csv", mode="r", newline="") as accounts:
    print(f"{'Account':<10} {'Name':<10} {'Balance' :>10}")
    reader = csv.reader(accounts)
    for record in reader:
        account, name, balance = record
        print(f"{account:<10} {name:<10} {balance:>10}")

Account    Name          Balance
100        Jones          345.67
200        Doe               0.0
300        White          -42.16
400        Stone           24.98
500        Rich           224.62


In [29]:
# Using pandas with csv

import pandas as pd


data_Frame = pd.read_csv("account.csv", names=["account", "name", "balance"])

data_Frame

Unnamed: 0,account,name,balance
0,100,Jones,345.67
1,200,Doe,0.0
2,300,White,-42.16
3,400,Stone,24.98
4,500,Rich,224.62


In [30]:
# Saving a csv dataframe

data_Frame.to_csv("accounts_from_data_Frame.csv", index=False)

In [39]:
# Importing titanic dataset

titanic_DF = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/carData/TitanicSurvival.csv')



# Looking at the first 5 records
titanic_DF.head()


Unnamed: 0.1,Unnamed: 0,survived,sex,age,passengerClass
0,"Allen, Miss. Elisabeth Walton",yes,female,29.0,1st
1,"Allison, Master. Hudson Trevor",yes,male,0.9167,1st
2,"Allison, Miss. Helen Loraine",no,female,2.0,1st
3,"Allison, Mr. Hudson Joshua Crei",no,male,30.0,1st
4,"Allison, Mrs. Hudson J C (Bessi",no,female,25.0,1st


In [40]:
# Looking at the last 5 records


titanic_DF.tail()

Unnamed: 0.1,Unnamed: 0,survived,sex,age,passengerClass
1304,"Zabour, Miss. Hileni",no,female,14.5,3rd
1305,"Zabour, Miss. Thamine",no,female,,3rd
1306,"Zakarian, Mr. Mapriededer",no,male,26.5,3rd
1307,"Zakarian, Mr. Ortin",no,male,27.0,3rd
1308,"Zimmerman, Mr. Leo",no,male,29.0,3rd


In [41]:
# Renaming columns


titanic_DF.columns = ["Name", "Survived", "Sex", "Age", "Class"]

titanic_DF.head()

Unnamed: 0,Name,Survived,Sex,Age,Class
0,"Allen, Miss. Elisabeth Walton",yes,female,29.0,1st
1,"Allison, Master. Hudson Trevor",yes,male,0.9167,1st
2,"Allison, Miss. Helen Loraine",no,female,2.0,1st
3,"Allison, Mr. Hudson Joshua Crei",no,male,30.0,1st
4,"Allison, Mrs. Hudson J C (Bessi",no,female,25.0,1st


In [42]:
# Simple Data Analysis


titanic_DF.describe() # This looks at only numeric data and excludes NaN

Unnamed: 0,Age
count,1046.0
mean,29.881135
std,14.4135
min,0.1667
25%,21.0
50%,28.0
75%,39.0
max,80.0


In [43]:
# Checking for survived passengers

(titanic_DF.Survived == 'yes').describe()

count      1309
unique        2
top       False
freq        809
Name: Survived, dtype: object

In [44]:
%matplotlib

Using matplotlib backend: MacOSX


In [52]:
histogram = titanic_DF.hist()


histogram

array([[<AxesSubplot:title={'center':'Age'}>]], dtype=object)