# Strings

Fall 2024 NLP

Objective - to practice working with strings (python type)

Make sure to copy the Colab file to yoru directory (otherwise yoru changes will not be saved!)

In [1]:
new_string = "This is a String"  # storing a string

print('ID:', id(new_string))  # shows the object identifier (address)
print('Type:', type(new_string))  # shows the object type
print('Value:', new_string)  # shows the object value

ID: 138344871889840
Type: <class 'str'>
Value: This is a String


In [2]:
# simple string
simple_string = 'Hello!' + " I'm a simple string"
print(simple_string)

Hello! I'm a simple string


In [3]:
# multi-line string, note the \n (newline) escape character automatically created
multi_line_string = """Hello I'm
a multi-line
string!"""

multi_line_string

"Hello I'm\na multi-line\nstring!"

In [4]:
print(multi_line_string)

Hello I'm
a multi-line
string!


In [5]:
# Normal string with escape sequences leading to a wrong file path!
escaped_string = "C:\the_folder\new_dir\file.txt"
print(escaped_string)  # will cause errors if we try to open a file here

C:	he_folder
ew_dirile.txt


In [6]:
# raw string keeping the backslashes in its normal form
raw_string = r'C:\the_folder\new_dir\file.txt'
print(raw_string)

C:\the_folder\new_dir\file.txt


In [7]:
# unicode string literals
string_with_unicode = 'H\u00e8llo!'
print(string_with_unicode)

Hèllo!


In [8]:
more_unicode = 'I love Pizza 🍕!  Shall we book a cab 🚕 to get pizza?'
print(more_unicode)

I love Pizza 🍕!  Shall we book a cab 🚕 to get pizza?


In [9]:
print(string_with_unicode + '\n' + more_unicode) # \n is new line

Hèllo!
I love Pizza 🍕!  Shall we book a cab 🚕 to get pizza?


In [10]:
' '.join([string_with_unicode, more_unicode]) # joining two strings

'Hèllo! I love Pizza 🍕!  Shall we book a cab 🚕 to get pizza?'

In [11]:
more_unicode[::-1]  # reverses the string

'?azzip teg ot 🚕 bac a koob ew llahS  !🍕 azziP evol I'

# String operations


## Different ways of String concatenation


In [12]:
'Hello 😊' + ' and welcome ' + 'to Python 🐍!'

'Hello 😊 and welcome to Python 🐍!'

In [13]:
'Hello 😊' ' and welcome ' 'to Python 🐍!'

'Hello 😊 and welcome to Python 🐍!'

In [14]:
# concatenation of variables and literals

In [15]:
s1 = 'Python 💻!'
'Hello 😊 ' + s1

'Hello 😊 Python 💻!'

In [16]:
# This will produce an error! You need plus sign!
'Hello 😊 ' s1

SyntaxError: invalid syntax (ipython-input-519936396.py, line 2)

In [None]:
# some more ways of concatenating strings

In [None]:
# You can multiply the same string!
s2 = '--🐍Python🐍--'
s2 * 5

In [None]:
# concatenating several strings together in parentheses

In [None]:
# checking for substrings in a string

In [None]:
'Python' in s1

In [None]:
'python' in s2

In [None]:
# computing total length of the string

In [None]:
len(s2)

# String indexing and slicing

In [None]:
# creating a string
s = 'PYTHON'
print(s)
print(type(s))

## String indexing

In [None]:
# depicting string indexes
for index, character in enumerate(s):
    print('Character ->', character, 'has index->', index)

In [None]:
# print one letter at a time based on the index position
s[0], s[1], s[2], s[3], s[4], s[5]

In [None]:
# Can you guess what happens here?
s[-1], s[-2], s[-3], s[-4], s[-5], s[-6]

## String slicing

In [None]:
# everything
s[:]

In [None]:
# from index 1 up to 4
s[1:4]

In [None]:
# from zero up to 3 and from 3 to the end
s[:3], s[3:]

# Useful String methods

## Case conversions

In [None]:
s = 'python is great'

In [None]:
s.capitalize()

In [None]:
s.upper()

In [None]:
s.title()

## String replace

In [None]:
s.replace('python', 'NLP')

## Numeric checks

In [None]:
'12345'.isdecimal()

In [None]:
'apollo11'.isdecimal()

## Alphabet checks

In [None]:
'python'.isalpha()

## Alphanumeric checks
(Letter and digits)

In [None]:
'total'.isalnum()

In [None]:
'abc123'.isalnum()

In [None]:
'1+1'.isalnum()

## String splitting and joining

In [None]:
# very useful!
s = 'I,am,a,comma,separated,string'
s.split(',')

In [None]:
' '.join(s.split(','))

In [None]:
# stripping whitespace characters
s = '   I am surrounded by spaces    '
s

In [None]:
# do you see any white space anymore at the beginning and at the end?
s.strip()

In [None]:
#Split by dot
sentences = 'Python is great. NLP is also good.'
sentences.split('.')

In [None]:
# split by dot and join with \n new line
print('\n'.join(sentences.split('.')))

In [None]:
print('\n'.join([sentence.strip()
                     for sentence in sentences.split('.')
                         if sentence]))

# String formatting

## Formatting strings using the format method - new style

In [None]:
# can you see what goes into each curly brackets?
'Hello {} {}, it is a great {} to meet you at {}'.format('Mr.', 'Jones', 'pleasure', 5)

In [None]:
'Hello {} {}, it is a great {} to meet you at {} o\' clock'.format('Sir', 'Arthur', 'honor', 9)