### Loading the Dataset 

In [1]:
# Loading the needed packages 
import pandas as pd 
import numpy as np

df = pd.read_csv('BL-Flickr-Images-Book-truncated50.csv')
df.head()

Unnamed: 0,Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks
0,206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12641.b.30.
1,216,,London; Virtue & Yorston,1868,Virtue & Co.,All for Greed. [A novel. The dedication signed...,"A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12626.cc.2.
2,218,,Londinium,1869,"Bradbury, Evans & Co.",Love the Avenger. By the author of “All for Gr...,"A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12625.dd.1.
3,472,,\u004c\u006f\u006e\u0064\u006f\u006e,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the...","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 10369.bbb.15.
4,480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it...","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 9007.d.28.


### Dropping columns

In [2]:
to_drop = ['Edition Statement','Corporate Author','Corporate Contributors','Former owner','Engraver','Contributors','Issuance type','Shelfmarks', 'Flickr URL']
df.drop(to_drop, inplace=True, axis=1)
df.head()

Unnamed: 0,Identifier,Place of Publication,Date of Publication,Publisher,Title,Author
0,206,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.
1,216,London; Virtue & Yorston,1868,Virtue & Co.,All for Greed. [A novel. The dedication signed...,"A., A. A."
2,218,Londinium,1869,"Bradbury, Evans & Co.",Love the Avenger. By the author of “All for Gr...,"A., A. A."
3,472,\u004c\u006f\u006e\u0064\u006f\u006e,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the...","A., E. S."
4,480,London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it...","A., E. S."


In [3]:
# Set Index as Identifier 
df = df.set_index('Identifier')
df.head()

Unnamed: 0_level_0,Place of Publication,Date of Publication,Publisher,Title,Author
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
206,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.
216,London; Virtue & Yorston,1868,Virtue & Co.,All for Greed. [A novel. The dedication signed...,"A., A. A."
218,Londinium,1869,"Bradbury, Evans & Co.",Love the Avenger. By the author of “All for Gr...,"A., A. A."
472,\u004c\u006f\u006e\u0064\u006f\u006e,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the...","A., E. S."
480,London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it...","A., E. S."


In [4]:
# Looking at a specfic location 
df.loc[1905:, 'Date of Publication'].head(10)

Identifier
1905           1888
1929    1839, 38-54
2836           1897
2854           1865
2956        1860-63
2957           1873
3017           1866
3131           1899
4598           1814
4884           1820
Name: Date of Publication, dtype: object

In [5]:
# Removing characters from the date of publication
rem = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
rem.head()

Identifier
206    1879
216    1868
218    1869
472    1851
480    1857
Name: Date of Publication, dtype: object

In [6]:
df['Date of Publication'] = pd.to_numeric(rem)
df['Date of Publication'].dtype
df['Date of Publication'].isnull().sum() / len(df)

0.22448979591836735

In [7]:
# Looking at top 10 Place of Publications
df['Place of Publication'].head(10)

Identifier
206                                   London
216                 London; Virtue & Yorston
218                                Londinium
472     \u004c\u006f\u006e\u0064\u006f\u006e
480                                   London
481                                   London
519                                   LONDON
667      pp. 40. G. Bryan & Co: Oxford, 1898
874                                  London]
1143                               Λονδίνιον
Name: Place of Publication, dtype: object

In [8]:
# Keeping Rows that consist of only LONDON as a Place of Publication 
new = df[df['Place of Publication'].str.contains("London")]

In [9]:
# Printing the new dataframe (Head)
new.head()

Unnamed: 0_level_0,Place of Publication,Date of Publication,Publisher,Title,Author
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
206,London,1879.0,S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.
216,London; Virtue & Yorston,1868.0,Virtue & Co.,All for Greed. [A novel. The dedication signed...,"A., A. A."
480,London,1857.0,Wertheim & Macintosh,"[The World in which I live, and my place in it...","A., E. S."
481,London,1875.0,William Macintosh,"[The World in which I live, and my place in it...","A., E. S."
874,London],,,"A Warning to the inhabitants of England, and L...",Remaʿ.


In [10]:
# Printing the new dataframe (Tail)
new.tail()

Unnamed: 0_level_0,Place of Publication,Date of Publication,Publisher,Title,Author
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14466,London,,Adams & King,"The Historie of Eald Street, now called Old St...",
15141,London,1893.0,T. Fisher Unwin,The New Egypt. A social sketch. [Edited by J. ...,"ADAMS, Francis William Lauderdale."
15146,London,1894.0,T. Fisher Unwin,Tiberius: a drama ... With introduction by W. ...,"ADAMS, Francis William Lauderdale."
16543,London,1816.0,John Murray,"The Narrative of Robert Adams, a sailor, who w...","Adams, Robert - Sailor"
18173,London,1804.0,Vernor & Hood,[The Works of the late Right Honourable Joseph...,"ADDISON, Joseph - Right Hon"


In [11]:
# Load the clean data into a new csv file 
new.to_csv('New-BL-Flickr-Images-Book-truncated50.csv')