## Working With Strings In DataFrame

In [1]:
import numpy as np
import pandas as pd

In [2]:
inspections = pd.read_csv('chicago_food_inspections.csv')

In [3]:
inspections

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
1,JETS PIZZA,Risk 2 (Medium)
2,ROOM 1520,Risk 3 (Low)
3,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
4,CHARTWELLS,Risk 1 (High)
...,...,...
153805,WOLCOTT'S,Risk 1 (High)
153806,DUNKIN DONUTS/BASKIN-ROBBINS,Risk 2 (Medium)
153807,Cafe 608,Risk 1 (High)
153808,mr.daniel's,Risk 1 (High)


In [4]:
inspections['Name']

0                 MARRIOT MARQUIS CHICAGO   
1                                JETS PIZZA 
2                                 ROOM 1520 
3                  MARRIOT MARQUIS CHICAGO  
4                              CHARTWELLS   
                         ...                
153805                           WOLCOTT'S  
153806       DUNKIN DONUTS/BASKIN-ROBBINS   
153807                             Cafe 608 
153808                          mr.daniel's 
153809                           TEMPO CAFE 
Name: Name, Length: 153810, dtype: object

In [5]:
inspections['Name'].values

# Each value containing Unnsesary Spaces

array([' MARRIOT MARQUIS CHICAGO   ', ' JETS PIZZA ', '   ROOM 1520 ',
       ..., ' Cafe 608 ', "  mr.daniel's ", '   TEMPO CAFE '],
      dtype=object)

## Applying String Methods To DataFrame 
- (.str ) used to assess the String Methods

In [6]:
inspections['Name'].str

<pandas.core.strings.accessor.StringMethods at 0x1f378fc5250>

In [7]:
inspections["Name"].str.upper()

0                 MARRIOT MARQUIS CHICAGO   
1                                JETS PIZZA 
2                                 ROOM 1520 
3                  MARRIOT MARQUIS CHICAGO  
4                              CHARTWELLS   
                         ...                
153805                           WOLCOTT'S  
153806       DUNKIN DONUTS/BASKIN-ROBBINS   
153807                             CAFE 608 
153808                          MR.DANIEL'S 
153809                           TEMPO CAFE 
Name: Name, Length: 153810, dtype: object

In [8]:
inspections["Name"].str.lower()

0                 marriot marquis chicago   
1                                jets pizza 
2                                 room 1520 
3                  marriot marquis chicago  
4                              chartwells   
                         ...                
153805                           wolcott's  
153806       dunkin donuts/baskin-robbins   
153807                             cafe 608 
153808                          mr.daniel's 
153809                           tempo cafe 
Name: Name, Length: 153810, dtype: object

In [9]:
inspections["Name"].str.title()

0                 Marriot Marquis Chicago   
1                                Jets Pizza 
2                                 Room 1520 
3                  Marriot Marquis Chicago  
4                              Chartwells   
                         ...                
153805                           Wolcott'S  
153806       Dunkin Donuts/Baskin-Robbins   
153807                             Cafe 608 
153808                          Mr.Daniel'S 
153809                           Tempo Cafe 
Name: Name, Length: 153810, dtype: object

In [10]:
inspections["Name"].str.capitalize()  # NOT APPLIED COZ OF SPACES

0                 marriot marquis chicago   
1                                jets pizza 
2                                 room 1520 
3                  marriot marquis chicago  
4                              chartwells   
                         ...                
153805                           wolcott's  
153806       dunkin donuts/baskin-robbins   
153807                             cafe 608 
153808                          mr.daniel's 
153809                           tempo cafe 
Name: Name, Length: 153810, dtype: object

## STRIP USED TO REMOVE SPACES

In [11]:
inspections.columns

Index(['Name', 'Risk'], dtype='object')

In [12]:
for column in inspections.columns:
    inspections[column] = inspections[column].str.strip()


#SAME

# inspections["Name"] = inspections["Name"].str.strip()
# inspections["Risk"] = inspections["Risk"].str.strip()

In [13]:
inspections['Name']

0              MARRIOT MARQUIS CHICAGO
1                           JETS PIZZA
2                            ROOM 1520
3              MARRIOT MARQUIS CHICAGO
4                           CHARTWELLS
                      ...             
153805                       WOLCOTT'S
153806    DUNKIN DONUTS/BASKIN-ROBBINS
153807                        Cafe 608
153808                     mr.daniel's
153809                      TEMPO CAFE
Name: Name, Length: 153810, dtype: object

In [14]:
inspections

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
1,JETS PIZZA,Risk 2 (Medium)
2,ROOM 1520,Risk 3 (Low)
3,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
4,CHARTWELLS,Risk 1 (High)
...,...,...
153805,WOLCOTT'S,Risk 1 (High)
153806,DUNKIN DONUTS/BASKIN-ROBBINS,Risk 2 (Medium)
153807,Cafe 608,Risk 1 (High)
153808,mr.daniel's,Risk 1 (High)


In [15]:
inspections['Name'].values

array(['MARRIOT MARQUIS CHICAGO', 'JETS PIZZA', 'ROOM 1520', ...,
       'Cafe 608', "mr.daniel's", 'TEMPO CAFE'], dtype=object)

In [16]:
inspections['Risk'].unique()

array(['Risk 1 (High)', 'Risk 2 (Medium)', 'Risk 3 (Low)', 'All', nan],
      dtype=object)

In [17]:
inspections = inspections.dropna(subset=['Risk'])

In [18]:
inspections

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
1,JETS PIZZA,Risk 2 (Medium)
2,ROOM 1520,Risk 3 (Low)
3,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
4,CHARTWELLS,Risk 1 (High)
...,...,...
153805,WOLCOTT'S,Risk 1 (High)
153806,DUNKIN DONUTS/BASKIN-ROBBINS,Risk 2 (Medium)
153807,Cafe 608,Risk 1 (High)
153808,mr.daniel's,Risk 1 (High)


## Replacing ALL risk as RISK4

In [19]:
inspections['Risk'].unique()

array(['Risk 1 (High)', 'Risk 2 (Medium)', 'Risk 3 (Low)', 'All'],
      dtype=object)

In [20]:
inspections = inspections.replace(to_replace='All' , value='Risk 4 (Extreme)')

inspections

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
1,JETS PIZZA,Risk 2 (Medium)
2,ROOM 1520,Risk 3 (Low)
3,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
4,CHARTWELLS,Risk 1 (High)
...,...,...
153805,WOLCOTT'S,Risk 1 (High)
153806,DUNKIN DONUTS/BASKIN-ROBBINS,Risk 2 (Medium)
153807,Cafe 608,Risk 1 (High)
153808,mr.daniel's,Risk 1 (High)


In [21]:
inspections['Risk'].unique()

array(['Risk 1 (High)', 'Risk 2 (Medium)', 'Risk 3 (Low)',
       'Risk 4 (Extreme)'], dtype=object)

In [22]:
# SAME -  TO display 1st 5 last 5 

inspections['Risk'].str[5:6]

inspections['Risk'].str.slice(5,6)

0         1
1         2
2         3
3         1
4         1
         ..
153805    1
153806    2
153807    1
153808    1
153809    1
Name: Risk, Length: 153744, dtype: object

## Displaying Only Wanted Text ---- Slicing 

In [23]:
inspections["Risk"] = inspections["Risk"].str.slice(8, -1)
inspections

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,High
1,JETS PIZZA,Medium
2,ROOM 1520,Low
3,MARRIOT MARQUIS CHICAGO,High
4,CHARTWELLS,High
...,...,...
153805,WOLCOTT'S,High
153806,DUNKIN DONUTS/BASKIN-ROBBINS,Medium
153807,Cafe 608,High
153808,mr.daniel's,High


In [24]:
inspections['Risk']

0           High
1         Medium
2            Low
3           High
4           High
           ...  
153805      High
153806    Medium
153807      High
153808      High
153809      High
Name: Risk, Length: 153744, dtype: object

## Finding Particular Value In Strings

In [25]:
has_tacos = inspections['Name'].str.lower().str.contains('tacos')

inspections[has_tacos]

Unnamed: 0,Name,Risk
42,MANNY'S TACOS & BURRITOS,High
69,TACOS NIETOS,High
93,MANNY'S TACOS & BURRITOS,High
256,"ZACATACOS, II. INC",High
285,PACO'S TACOS 2,High
...,...,...
153284,"JESSE TACOS RESTAURANT, INC.",High
153472,DANNYS TACOS #2,High
153504,MIS TACOS,High
153540,markos tacos&cafe,High


In [26]:
start_with_tacos = inspections['Name'].str.lower().str.startswith('tacos')

inspections[start_with_tacos ]

Unnamed: 0,Name,Risk
69,TACOS NIETOS,High
556,TACOS EL TIO 2 INC.,High
675,TACOS DON GABINO,High
958,TACOS EL TIO 2 INC.,High
1036,TACOS EL TIO 2 INC.,High
...,...,...
143587,TACOS DE LUNA,High
144026,TACOS GARCIA,High
146174,Tacos Place's 1,High
147810,TACOS MARIO'S LIMITED,High


In [27]:
end_with_tacos = inspections['Name'].str.lower().str.endswith('tacos')

inspections[end_with_tacos ]

Unnamed: 0,Name,Risk
382,LAZO'S TACOS,High
569,LAZO'S TACOS,High
2652,FLYING TACOS,Low
3250,JONY'S TACOS,High
3812,PACO'S TACOS,High
...,...,...
151121,REYES TACOS,High
151318,EL MACHO TACOS,High
151801,EL MACHO TACOS,High
153087,RAYMOND'S TACOS,High


## Splitting Strings
- customers['Name'].str.split(pat = ' ') ----- > pat ' deni base cheskoni divide cheyaali (,) ; em lekunte (space) '

In [28]:
customers = pd.read_csv('customers.csv')

customers

Unnamed: 0,Name,Address
0,Frank Manning,"6461 Quinn Groves, East Matthew, New Hampshire..."
1,Elizabeth Johnson,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,..."
2,Donald Stephens,"19120 Fleming Manors, Prestonstad, Montana, 23495"
3,Michael Vincent III,"441 Olivia Creek, Jimmymouth, Georgia, 82991"
4,Jasmine Zamora,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7..."
...,...,...
9956,Dana Browning,"762 Andrew Views Apt. 254, North Paul, New Mex..."
9957,Amanda Anderson,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ..."
9958,Eric Davis,"73015 Michelle Squares, Watsonville, West Virg..."
9959,Taylor Hernandez,"129 Keith Greens, Haleyfurt, Oklahoma, 98916"


In [29]:
customers['Name'].str.split()
customers['Name'].str.split(' ')
customers['Name'].str.split(pat = ' ')

0              [Frank, Manning]
1          [Elizabeth, Johnson]
2            [Donald, Stephens]
3       [Michael, Vincent, III]
4             [Jasmine, Zamora]
                 ...           
9956           [Dana, Browning]
9957         [Amanda, Anderson]
9958              [Eric, Davis]
9959        [Taylor, Hernandez]
9960        [Sherry, Nicholson]
Name: Name, Length: 9961, dtype: object

In [30]:
#  Customers HAving No.Of PArts in Names

customers['Name'].str.split().str.len().value_counts()

Name
2    9734
3     227
Name: count, dtype: int64

In [31]:
# Making All NAmes Only 2 parts
customers['Name'].str.split(pat = ' ' , n=1)

0             [Frank, Manning]
1         [Elizabeth, Johnson]
2           [Donald, Stephens]
3       [Michael, Vincent III]
4            [Jasmine, Zamora]
                 ...          
9956          [Dana, Browning]
9957        [Amanda, Anderson]
9958             [Eric, Davis]
9959       [Taylor, Hernandez]
9960       [Sherry, Nicholson]
Name: Name, Length: 9961, dtype: object

In [32]:

customers['Name'].str.split(pat = ' ' , n=1).str.len().value_counts()

Name
2    9961
Name: count, dtype: int64

In [33]:
customers['Name'].str.split(pat = ' ' , n=1 , expand=True)

Unnamed: 0,0,1
0,Frank,Manning
1,Elizabeth,Johnson
2,Donald,Stephens
3,Michael,Vincent III
4,Jasmine,Zamora
...,...,...
9956,Dana,Browning
9957,Amanda,Anderson
9958,Eric,Davis
9959,Taylor,Hernandez


In [34]:
customers[['First Name' , 'Last Name']] = customers['Name'].str.split(pat = ' ' , n=1 , expand=True)

customers[['First Name' , 'Last Name']]

Unnamed: 0,First Name,Last Name
0,Frank,Manning
1,Elizabeth,Johnson
2,Donald,Stephens
3,Michael,Vincent III
4,Jasmine,Zamora
...,...,...
9956,Dana,Browning
9957,Amanda,Anderson
9958,Eric,Davis
9959,Taylor,Hernandez


In [35]:
customers

Unnamed: 0,Name,Address,First Name,Last Name
0,Frank Manning,"6461 Quinn Groves, East Matthew, New Hampshire...",Frank,Manning
1,Elizabeth Johnson,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,...",Elizabeth,Johnson
2,Donald Stephens,"19120 Fleming Manors, Prestonstad, Montana, 23495",Donald,Stephens
3,Michael Vincent III,"441 Olivia Creek, Jimmymouth, Georgia, 82991",Michael,Vincent III
4,Jasmine Zamora,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...",Jasmine,Zamora
...,...,...,...,...
9956,Dana Browning,"762 Andrew Views Apt. 254, North Paul, New Mex...",Dana,Browning
9957,Amanda Anderson,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ...",Amanda,Anderson
9958,Eric Davis,"73015 Michelle Squares, Watsonville, West Virg...",Eric,Davis
9959,Taylor Hernandez,"129 Keith Greens, Haleyfurt, Oklahoma, 98916",Taylor,Hernandez


In [36]:
# customers.drop(labels = 'Name' , axis = 1)


# already dropped so dont execute

In [37]:
customers

Unnamed: 0,Name,Address,First Name,Last Name
0,Frank Manning,"6461 Quinn Groves, East Matthew, New Hampshire...",Frank,Manning
1,Elizabeth Johnson,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,...",Elizabeth,Johnson
2,Donald Stephens,"19120 Fleming Manors, Prestonstad, Montana, 23495",Donald,Stephens
3,Michael Vincent III,"441 Olivia Creek, Jimmymouth, Georgia, 82991",Michael,Vincent III
4,Jasmine Zamora,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...",Jasmine,Zamora
...,...,...,...,...
9956,Dana Browning,"762 Andrew Views Apt. 254, North Paul, New Mex...",Dana,Browning
9957,Amanda Anderson,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ...",Amanda,Anderson
9958,Eric Davis,"73015 Michelle Squares, Watsonville, West Virg...",Eric,Davis
9959,Taylor Hernandez,"129 Keith Greens, Haleyfurt, Oklahoma, 98916",Taylor,Hernandez


In [38]:
customers[["Street", "City", "State", "Zip"]] = (customers["Address"].str.split(pat=", ",  expand=True))
customers

Unnamed: 0,Name,Address,First Name,Last Name,Street,City,State,Zip
0,Frank Manning,"6461 Quinn Groves, East Matthew, New Hampshire...",Frank,Manning,6461 Quinn Groves,East Matthew,New Hampshire,16656
1,Elizabeth Johnson,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,...",Elizabeth,Johnson,1360 Tracey Ports Apt. 419,Kyleport,Vermont,31924
2,Donald Stephens,"19120 Fleming Manors, Prestonstad, Montana, 23495",Donald,Stephens,19120 Fleming Manors,Prestonstad,Montana,23495
3,Michael Vincent III,"441 Olivia Creek, Jimmymouth, Georgia, 82991",Michael,Vincent III,441 Olivia Creek,Jimmymouth,Georgia,82991
4,Jasmine Zamora,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...",Jasmine,Zamora,4246 Chelsey Ford Apt. 310,Karamouth,Utah,76252
...,...,...,...,...,...,...,...,...
9956,Dana Browning,"762 Andrew Views Apt. 254, North Paul, New Mex...",Dana,Browning,762 Andrew Views Apt. 254,North Paul,New Mexico,28889
9957,Amanda Anderson,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ...",Amanda,Anderson,44188 Day Crest Apt. 901,Lake Marcia,Maine,37378
9958,Eric Davis,"73015 Michelle Squares, Watsonville, West Virg...",Eric,Davis,73015 Michelle Squares,Watsonville,West Virginia,03933
9959,Taylor Hernandez,"129 Keith Greens, Haleyfurt, Oklahoma, 98916",Taylor,Hernandez,129 Keith Greens,Haleyfurt,Oklahoma,98916


In [39]:
customers = customers.drop(labels = 'Address' , axis = 1)

In [40]:
customers

Unnamed: 0,Name,First Name,Last Name,Street,City,State,Zip
0,Frank Manning,Frank,Manning,6461 Quinn Groves,East Matthew,New Hampshire,16656
1,Elizabeth Johnson,Elizabeth,Johnson,1360 Tracey Ports Apt. 419,Kyleport,Vermont,31924
2,Donald Stephens,Donald,Stephens,19120 Fleming Manors,Prestonstad,Montana,23495
3,Michael Vincent III,Michael,Vincent III,441 Olivia Creek,Jimmymouth,Georgia,82991
4,Jasmine Zamora,Jasmine,Zamora,4246 Chelsey Ford Apt. 310,Karamouth,Utah,76252
...,...,...,...,...,...,...,...
9956,Dana Browning,Dana,Browning,762 Andrew Views Apt. 254,North Paul,New Mexico,28889
9957,Amanda Anderson,Amanda,Anderson,44188 Day Crest Apt. 901,Lake Marcia,Maine,37378
9958,Eric Davis,Eric,Davis,73015 Michelle Squares,Watsonville,West Virginia,03933
9959,Taylor Hernandez,Taylor,Hernandez,129 Keith Greens,Haleyfurt,Oklahoma,98916
