# String formatting

## The legacy method

In [1]:
import math
print('%5.3f' % (math.pi))   
print('%d, %d, %d' %(1, 2, 3))   
print('"%10s", "%-10s"' % ('test', 'test'))
print('%.5s' % ('xylophone',))

3.142
1, 2, 3
"      test", "test      "
xylop


## str.format() method

In [2]:
# member and element access (index; attribute; argument name)
print('{0}, {1}, {2}'.format(1, 2, 3))                                     # Access arguments by ordinal position
print('{value1}, {value2}, {value2}'.format(value1=1, value2=2, value3=3)) # Access keyword arguments by name
print('{[1]}'.format(['first', 'second', 'third']))                        # Access element by index
print("Pi is {} and e is {}".format(math.pi, math.e))                      # Access element attribute
print("Pi is {0.pi} and e is {0.e}".format(math))                          # Access element attribute

1, 2, 3
1, 2, 2
second
Pi is 3.141592653589793 and e is 2.718281828459045
Pi is 3.141592653589793 and e is 2.718281828459045


In [3]:
# conversion to machine readable
print("Harold's a clever {0!s}".format('dog'))
print("Harold's a clever {0!r}".format('dog')) # is calls repr()

Harold's a clever dog
Harold's a clever 'dog'


In [4]:
# alignment
print('"{:<10} left aligned"'.format('text'))
print('"{:>10} right aligned"'.format('text'))
print('"{:^10} center aligned"'.format('text'))

"text       left aligned"
"      text right aligned"
"   text    center aligned"


In [5]:
# Specify width (string automatically align left, fill with spaces)
'hey {:10}'.format('hello')

'hey hello     '

In [6]:
# for long string truncation
print('{:5}'.format('xylophone'))
print('{:.5}'.format('xylophone'))

xylophone
xylop


In [7]:
# width + truncation
print('"{:^10.5}"'.format('xylophone'))

"  xylop   "


In [8]:
# binary and character representation
print('{0:b} is the binary value of {0}'.format(6))

print('{0:c} is character of {0}'.format(100))

110 is the binary value of 6
d is character of 100


In [9]:
# numerical representation: integer numbers
print('{:d}'.format(123))
print('{:5d}'.format(123))
print('{:,}'.format(1000000))

123
  123
1,000,000


In [10]:
# numerical representation: float numbers
print('{:e}'.format(0.0000000001))
print('{:E}'.format(0.0000000001))
print('{:f}'.format(3/14.0))
print('{:g}'.format(30000/14.0))
print('{:g}'.format(30000000/14.0))
print('{:%}'.format(0.66))
print('{:.3}'.format(10/3))

1.000000e-10
1.000000E-10
0.214286
2142.86
2.14286e+06
66.000000%
3.33


In [11]:
# padding
print('{:>010}'.format(2))
print('{:^010}'.format(2))
print('{:<010}'.format(2))

0000000002
0000200000
2000000000


In [12]:
"{:*^20s}".format("Hello")

'*******Hello********'

In [13]:
# using str repeatition to repeat identical format specs
print(('{:10.3f}' * 3).format(1.23456, 2.34567, 3.45678))
print(('Number {:2d}:{:10.3f}\n' * 3).format(1, 1.23456, 2, 2.34567, 3, 3.45678))

     1.235     2.346     3.457
Number  1:     1.235
Number  2:     2.346
Number  3:     3.457



In [14]:
# you can also use the `format` function for a value with the same format spec syntax
print(format(math.pi, '<9.5f'))
print(format(math.pi, '^9.5f'))
print(format(math.pi, '>9.5f'))
print(format(math.pi, '12.5e'))

3.14159  
 3.14159 
  3.14159
 3.14159e+00


In [15]:
# however this doesn't have any advantage compared with the str.format() method 
print('{:<9.5f}'.format(math.pi))
print('{:^9.5f}'.format(math.pi))
print('{:>9.5f}'.format(math.pi))
print('{:12.5e}'.format(math.pi))

3.14159  
 3.14159 
  3.14159
 3.14159e+00


In [16]:
# datetime
from datetime import datetime
'{:%Y-%m-%d %H:%M}'.format(datetime(2001, 2, 3, 4, 5))

'2001-02-03 04:05'

## f'' string method

In [17]:
val = 'virus'
print(f"Some {val} are good some {val} are bad.") 

Some virus are good some virus are bad.


In [18]:
name = 'Peter'
age = 23

print('%s is %d years old' % (name, age))
print('{} is {} years old'.format(name, age))
print(f'{name} is {age} years old')

Peter is 23 years old
Peter is 23 years old
Peter is 23 years old


In [19]:
# when you need to use dictionary
user = {'name': 'John Doe', 'occupation': 'gardener'}
print(f"{user['name']} is a {user['occupation']}")

John Doe is a gardener


In [20]:
import datetime 
now = datetime.datetime.now() 
print(f"{now:%B %d, %Y}") 
print(f'{now:%Y-%m-%d %H:%M}')

February 12, 2020
2020-02-12 17:28


In [21]:
# you call put any expression in it
def mymax(x, y):
    return x if x > y else y

a = 3
b = 4

print(f'Max of {a} and {b} is {mymax(a, b)}')

Max of 3 and 4 is 4


In [22]:
# when the f-string is better than str.format()
first_name = 'Donald'
last_name = 'Trump'
age = 73
profession = 'President'
affiliation = 'U.S. Goverment'


# using str.format()
print(("Hello, {first_name} {last_name}. You are {age}. " + 
       "You are a {profession}. You are a member of {affiliation}.") \
        .format(first_name=first_name, last_name=last_name, age=age, \
                profession=profession, affiliation=affiliation))

Hello, Donald Trump. You are 73. You are a President. You are a member of U.S. Goverment.


In [23]:
# using f-string
print(f"Hello, {first_name} {last_name}. You are {age}. You are a {profession}. You are a member of {affiliation}.")

Hello, Donald Trump. You are 73. You are a President. You are a member of U.S. Goverment.


In [24]:
# let's compare their speed
%timeit "Hello, {first_name} {last_name}. You are {age}. You are a {profession}. You are a member of {affiliation}.".format(first_name=first_name, last_name=last_name, age=age, profession=profession, affiliation=affiliation)
%timeit f"Hello, {first_name} {last_name}. You are {age}. You are a {profession}. You are a member of {affiliation}."

1.64 µs ± 69.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
402 ns ± 65.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


# Read a file

In [25]:
# to open and read a text file
f = open('dryland_corn_acres_state.csv') # default is read a file when it is being opened

# method 1, for loop to read lines
i = 0
for line in f:
    i += 1
    print(line)
    if i == 5:
        break

,Value,state_name,statisticcat_desc,state_alpha,source_desc,domain_desc,year

0,48205,ALABAMA,AREA HARVESTED,AL,CENSUS,IRRIGATION STATUS,2013

1,11215,ARKANSAS,AREA HARVESTED,AR,CENSUS,IRRIGATION STATUS,2013

2,115785,COLORADO,AREA HARVESTED,CO,CENSUS,IRRIGATION STATUS,2013

3,942,CONNECTICUT,AREA HARVESTED,CT,CENSUS,IRRIGATION STATUS,2013



In [26]:
# the print isn't nice. I decide to write a function to print it nicer
def print_line(line):
    items = line.split(',')
    print(('{:^11.10}'*len(items)).format(*items))
    

In [27]:
# to open and read a text file
f = open('dryland_corn_acres_state.csv') # default is read a file when it is being opened

# method 1, for loop to read lines
i = 0
for line in f:
    i += 1
    print_line(line)
    if i == 5:
        break

              Value   state_name statisticc state_alph source_des domain_des    year
   
     0        48205     ALABAMA  AREA HARVE     AL       CENSUS   IRRIGATION    2013
   
     1        11215    ARKANSAS  AREA HARVE     AR       CENSUS   IRRIGATION    2013
   
     2       115785    COLORADO  AREA HARVE     CO       CENSUS   IRRIGATION    2013
   
     3         942    CONNECTICU AREA HARVE     CT       CENSUS   IRRIGATION    2013
   


In [28]:
# method 2, for loop with range
for i in range(5):
    line = f.readline()
    print_line(line)

     4        53257    DELAWARE  AREA HARVE     DE       CENSUS   IRRIGATION    2013
   
     5        10054     FLORIDA  AREA HARVE     FL       CENSUS   IRRIGATION    2013
   
     6        33357     GEORGIA  AREA HARVE     GA       CENSUS   IRRIGATION    2013
   
     7       531844    ILLINOIS  AREA HARVE     IL       CENSUS   IRRIGATION    2013
   
     8       549086     INDIANA  AREA HARVE     IN       CENSUS   IRRIGATION    2013
   


In [29]:
# method 3, read all the lines
lines = f.readlines()

print(f'Number of lines is {len(lines)}')

for line in lines[:5]:
    print_line(line)


Number of lines is 417
     9       227804      IOWA    AREA HARVE     IA       CENSUS   IRRIGATION    2013
   
    10       775512     KANSAS   AREA HARVE     KS       CENSUS   IRRIGATION    2013
   
    11       112180    KENTUCKY  AREA HARVE     KY       CENSUS   IRRIGATION    2013
   
    12        82984    LOUISIANA AREA HARVE     LA       CENSUS   IRRIGATION    2013
   
    13                   MAINE   AREA HARVE     ME       CENSUS   IRRIGATION    2013
   


In [30]:
# close the file after your operations
f.close()

In [31]:
# it is easy to forget to close files that have been opened.
with open('dryland_corn_acres_state.csv') as f:
    for i in range(5):
        line = f.readline()
        print(line)

,Value,state_name,statisticcat_desc,state_alpha,source_desc,domain_desc,year

0,48205,ALABAMA,AREA HARVESTED,AL,CENSUS,IRRIGATION STATUS,2013

1,11215,ARKANSAS,AREA HARVESTED,AR,CENSUS,IRRIGATION STATUS,2013

2,115785,COLORADO,AREA HARVESTED,CO,CENSUS,IRRIGATION STATUS,2013

3,942,CONNECTICUT,AREA HARVESTED,CT,CENSUS,IRRIGATION STATUS,2013



In [32]:
f.read()

ValueError: I/O operation on closed file.

In [33]:
# search Nebraska's dryland corn acres
with open('dryland_corn_acres_state.csv') as f:
    for line in f:
        if line.startswith(','):
            # this is the first line
            headers = line.rstrip().split(',')
            istate_name = headers.index('state_name')
            ivalue = headers.index('Value')
            iyear = headers.index('year')
        elif 'NEBRASKA' in line and 'SURVEY' in line:
            line_data = line.rstrip().split(',')
            print(f'{line_data[istate_name]} grew {line_data[ivalue]} acres of dryland Corn in {line_data[iyear]}.')

NEBRASKA grew 3997000 acres of dryland Corn in 2018.
NEBRASKA grew 4022000 acres of dryland Corn in 2017.
NEBRASKA grew 3973000 acres of dryland Corn in 2016.
NEBRASKA grew 3783000 acres of dryland Corn in 2015.
NEBRASKA grew 3697000 acres of dryland Corn in 2014.
NEBRASKA grew 3835000 acres of dryland Corn in 2013.
NEBRASKA grew 3325000 acres of dryland Corn in 2012.
NEBRASKA grew 4000000 acres of dryland Corn in 2011.
NEBRASKA grew 3610000 acres of dryland Corn in 2010.
NEBRASKA grew 3560000 acres of dryland Corn in 2009.
NEBRASKA grew 3335000 acres of dryland Corn in 2008.
NEBRASKA grew 3468000 acres of dryland Corn in 2007.
NEBRASKA grew 3035000 acres of dryland Corn in 2006.
NEBRASKA grew 3325000 acres of dryland Corn in 2005.
NEBRASKA grew 3065000 acres of dryland Corn in 2004.
NEBRASKA grew 2935000 acres of dryland Corn in 2003.
NEBRASKA grew 2695000 acres of dryland Corn in 2002.
NEBRASKA grew 3200000 acres of dryland Corn in 2001.
NEBRASKA grew 3250000 acres of dryland Corn in

# Write a file

In [34]:
# search Nebraska's dryland corn acres
years = []
acres = []
with open('dryland_corn_acres_state.csv') as f:
    for line in f:
        if line.startswith(','):
            # this is the first line
            headers = line.rstrip().split(',')
            istate_name = headers.index('state_name')
            ivalue = headers.index('Value')
            iyear = headers.index('year')
        elif 'NEBRASKA' in line:
            line_data = line.rstrip().split(',')
            acres.append(line_data[ivalue])
            years.append(line_data[iyear])
            
print(years[:5], acres[:5])

['2013', '2018', '2017', '2016', '2015'] ['1442994', '3997000', '4022000', '3973000', '3783000']


In [35]:
with open('nebraska_corn_acres.csv', 'w') as f:
    f.write('year, acres\n')
    for i in range(len(years)):
        f.write(f'{years[i]},{acres[i]}\n')

OSError: [Errno 22] Invalid argument

In [36]:
# writing a file when opening another file
with open('dryland_corn_acres_state.csv') as fr:
    with open('nebraska_corn_acres.csv', 'w') as fw:
        fw.write('year, acres\n') # first line
        for line in fr:
            if line.startswith(','):
                # this is the first line
                headers = line.rstrip().split(',')
                istate_name = headers.index('state_name')
                ivalue = headers.index('Value')
                iyear = headers.index('year')
            elif 'NEBRASKA' in line:
                line_data = line.rstrip().split(',')
                fw.write(f'{line_data[iyear]},{line_data[ivalue]}\n')
            

! head -10 nebraska_corn_acres.csv

year, acres
2013,1442994
2018,3997000
2017,4022000
2016,3973000
2015,3783000
2014,3697000
2013,3835000
2012,3325000
2011,4000000


In [37]:
# write the data into binary file for each state
import struct
states = {}
with open('dryland_corn_acres_state.csv') as fr:
    for line in fr:
        if line.startswith(','):
            # this is the first line
            headers = line.rstrip().split(',')
            istate_name = headers.index('state_name')
            ivalue = headers.index('Value')
            iyear = headers.index('year')
        elif 'SURVEY' in line:
            line_data = line.rstrip().split(',')
            s = line_data[istate_name]
            if s not in states:
                states[s] = open(f'{line_data[istate_name].strip()}.bin', 'wb')
            states[s].write(struct.pack('Hf', int(line_data[iyear]), float(line_data[ivalue])))


# close files
for s in states:
    states[s].close()
                  
    

In [38]:
for s in states:
    with open(f'{s.strip()}.bin', 'rb') as f:
        for i in range(3):
            dat = f.read(struct.calcsize('Hf'))
            print(s, struct.unpack('Hf', dat))

COLORADO (2018, 615000.0)
COLORADO (2017, 634000.0)
COLORADO (2016, 500000.0)
DELAWARE (2018, 88000.0)
DELAWARE (2017, 91000.0)
DELAWARE (2016, 89000.0)
KANSAS (2018, 3573000.0)
KANSAS (2017, 3778000.0)
KANSAS (2016, 3370000.0)
MONTANA (1967, 900.0)
MONTANA (1966, 700.0)
MONTANA (1965, 200.0)
NEBRASKA (2018, 3997000.0)
NEBRASKA (2017, 4022000.0)
NEBRASKA (2016, 3973000.0)
NEW MEXICO (1995, 600.0)
NEW MEXICO (1994, 2200.0)
NEW MEXICO (1993, 1650.0)
NORTH DAKOTA (2009, 1750000.0)
NORTH DAKOTA (2008, 2199000.0)
NORTH DAKOTA (2007, 2243100.0)
OKLAHOMA (2009, 330000.0)
OKLAHOMA (2008, 170000.0)
OKLAHOMA (2007, 130000.0)
SOUTH DAKOTA (2009, 4476000.0)
SOUTH DAKOTA (2008, 4198000.0)
SOUTH DAKOTA (2007, 4279000.0)
TEXAS (2018, 1000000.0)
TEXAS (2017, 1285000.0)
TEXAS (2016, 1350000.0)
WYOMING (2008, 5000.0)
WYOMING (2007, 2500.0)
WYOMING (2006, 2500.0)
