# Question 1: Replace Space, Comma, and Dot with Colon

In [61]:
import re

def replace_punctuation_with_colon(text):
    # Replace space, comma, and dot with a colon
    result = re.sub(r'[ ,.]', ':', text)
    return result

sample_text = 'Python Exercises, PHP exercises.'
output = replace_punctuation_with_colon(sample_text)
print(output)  

Python:Exercises::PHP:exercises:


# Question 2: Create DataFrame and Remove Special Characters

In [62]:
import pandas as pd
import re

data = {'SUMMARY': ['hello, world!', 'XXXXX test', '123four, five:; six...']}
df = pd.DataFrame(data)

# Remove special characters except words
df['SUMMARY'] = df['SUMMARY'].apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x))

print(df)


         SUMMARY
0    hello world
1     XXXXX test
2  four five six


# Question 3: Find Words with at Least 4 Characters

In [63]:
def find_long_words(text):
    pattern = re.compile(r'\b\w{4,}\b')
    return pattern.findall(text)

sample_text = "This is a sample text with some long words like Python, regex, and pandas."
long_words = find_long_words(sample_text)
print(long_words)


['This', 'sample', 'text', 'with', 'some', 'long', 'words', 'like', 'Python', 'regex', 'pandas']


# Question 4: Find Three, Four, and Five Character Words

In [64]:
def find_specific_length_words(text, length):
    pattern = re.compile(r'\b\w{%d}\b' % length)
    return pattern.findall(text)

sample_text = "This is a sample text with some short and long words."
three_char_words = find_specific_length_words(sample_text, 3)
four_char_words = find_specific_length_words(sample_text, 4)
five_char_words = find_specific_length_words(sample_text, 5)

print("Three-character words:", three_char_words)
print("Four-character words:", four_char_words)
print("Five-character words:", five_char_words)


Three-character words: ['and']
Four-character words: ['This', 'text', 'with', 'some', 'long']
Five-character words: ['short', 'words']


# Question 5: Remove Parentheses from Strings

In [65]:
import re

def remove_parentheses(strings):
    pattern = re.compile(r'\s*\([^)]*\)\s*')
    return [pattern.sub('', s) for s in strings]

sample_strings = ["example (.com)", "hr@fliprobo (.com)", "github (.com)",
                  "Hello (Data Science World)", "Data (Scientist)"]

cleaned_strings = remove_parentheses(sample_strings)
for s in cleaned_strings:
    print(s)


example
hr@fliprobo
github
Hello
Data


# Question 6- Write a python program to remove the parenthesis area from the text stored in the text file using Regular Expression.

In [66]:
import re

def remove_parentheses_from_file(file_path):
    try:
        # Read the content from the file
        with open(file_path, 'r') as file:
            text = file.read()

        # Remove content within parentheses using a regular expression
        cleaned_text = re.sub(r'\([^)]*\)', '', text)

        # Write the cleaned content back to the file
        with open(file_path, 'w') as file:
            file.write(cleaned_text)

        print(f"Removed content within parentheses from {file_path}.")
    except FileNotFoundError:
        print(f"File {file_path} not found.")

# Example usage
file_path = 'sample_text.txt' 
remove_parentheses_from_file(file_path)


Removed content within parentheses from sample_text.txt.


# Question 7: Split String into Uppercase Letters

In [67]:
def split_uppercase(text):
    pattern = re.compile(r'[A-Z][a-z]*')
    return pattern.findall(text)

sample_text = "ImportanceOfRegularExpressionsInPython"
uppercase_words = split_uppercase(sample_text)
print(uppercase_words)

['Importance', 'Of', 'Regular', 'Expressions', 'In', 'Python']


# Question 8: Insert Spaces between Words Starting with Numbers

In [68]:
def insert_spaces_numbers(text):
    pattern = re.compile(r'(?<=[0-9])(?=[A-Za-z])')
    return pattern.sub(' ', text)

sample_text = "RegularExpression1IsAn2ImportantTopic3InPython"
formatted_text = insert_spaces_numbers(sample_text)
print(formatted_text)

RegularExpression1 IsAn2 ImportantTopic3 InPython


# Question 9: Insert Spaces between Words Starting with Capital Letters or Numbers

In [69]:
def insert_spaces_capital(text):
    pattern = re.compile(r'(?<=[A-Z0-9])(?=[A-Z])')
    return pattern.sub(' ', text)

sample_text = "RegularExpression1IsAn2ImportantTopic3InPython"
formatted_text = insert_spaces_capital(sample_text)
print(formatted_text)
# Expected Output: RegularExpression 1 IsAn 2 Important

RegularExpression1 IsAn2 ImportantTopic3 InPython


# Question 10: Extract First 6 Letters of Each Country

In [70]:
import pandas as pd

# Replace 'path_to_file.csv' with the actual path to the downloaded file
df = pd.read_csv("happiness_score_dataset.csv")

# Extract the first 6 letters of each country
df["first_six_letters"] = df["Country"].str[:6]

# Display the resulting DataFrame
print(df[["Country", "first_six_letters"]])

         Country first_six_letters
0    Switzerland            Switze
1        Iceland            Icelan
2        Denmark            Denmar
3         Norway            Norway
4         Canada            Canada
..           ...               ...
153       Rwanda            Rwanda
154        Benin             Benin
155        Syria             Syria
156      Burundi            Burund
157         Togo              Togo

[158 rows x 2 columns]


# Question 11: Match String with Upper and Lowercase Letters, Numbers, and Underscores

In [71]:
import re

def validate_string(s):
    pattern = re.compile(r'^[a-zA-Z0-9_]+$')
    return bool(pattern.match(s))

sample_string = "Hello_World123"
print(validate_string(sample_string))


True


# Question 12: String Starting with a Specific Number

In [72]:
def starts_with_number(s, number):
    return s.startswith(str(number))

sample_string = "123abc"
print(starts_with_number(sample_string, 123))  


True


# Question 13: Remove Leading Zeros from an IP Address

In [73]:
def remove_leading_zeros(ip_address):
    components = ip_address.split(".")
    cleaned_components = [str(int(comp)) for comp in components]
    return ".".join(cleaned_components)

sample_ip = "192.010.001.100"
print(remove_leading_zeros(sample_ip))  # Expected Output: 192.10.1.100


192.10.1.100


# Question 14: Extract Date String in Desired Format

In [74]:
def extract_date(sample_text):
    pattern = re.compile(r'([A-Z][a-z]+ \d{1,2}(?:st|nd|rd|th) \d{4})')
    match = pattern.search(sample_text)
    if match:
        return match.group(1)
    else:
        return "Date not found"

sample_text = "On August 15th 1947 that India was declared independent..."
print(extract_date(sample_text))  


August 15th 1947


# Question 15: Search Literal Strings in a Text

In [75]:
def search_words(text, words):
    found_words = [word for word in words if word in text]
    return found_words

sample_text = "The quick brown fox jumps over the lazy dog."
searched_words = ["fox", "dog", "horse"]
print(search_words(sample_text, searched_words))


['fox', 'dog']


# Question 16: Search Literal String and Find Location

In [76]:
def find_word_location(text, word):
    match = re.search(word, text)
    if match:
        return match.start()
    else:
        return -1

sample_text = "The quick brown fox jumps over the lazy dog."
searched_word = "fox"
print(find_word_location(sample_text, searched_word))  


16


# Question 17: Find Substrings within a String

In [77]:
import re

def find_substrings(text, pattern):
    return re.findall(pattern, text)

sample_text = "Python exercises, PHP exercises, C# exercises"
pattern = r'\b\w+\s+exercises\b'
print(find_substrings(sample_text, pattern))


['Python exercises', 'PHP exercises']


# Question 18: Find Occurrence and Position of Substrings

In [78]:
import re

def find_occurrence_and_position(text, pattern):
    matches = [(match.group(), match.start()) for match in re.finditer(pattern, text)]
    return matches

sample_text = "Python exercises, PHP exercises, C# exercises"
pattern = r'\b\w+\s+exercises\b'
print(find_occurrence_and_position(sample_text, pattern))


[('Python exercises', 0), ('PHP exercises', 18)]


# Question 19: Convert Date Format from yyyy-mm-dd to dd-mm-yyyy

In [79]:
import datetime

def convert_date_format(date_str):
    # List of possible date formats to try
    date_formats = ("%Y-%m-%d", "%Y-%m", "%d/%m/%Y")
    
    for date_format in date_formats:
        try:
            if date_format == "%Y-%m" and len(date_str) == 7:
                # Add default day if input is in YYYY-MM format
                date_str += "-01"
            date_obj = datetime.datetime.strptime(date_str, date_format)
            # Format date in the desired output format
            formatted_date = date_obj.strftime("%d-%m-%Y")
            return formatted_date
        except ValueError:
            continue

    return "Invalid date format"

# Example usage
input_date = "05/05/2003"
output_date = convert_date_format(input_date)
print(f"Converted date: {output_date}")


Converted date: 05-05-2003


# Question 20: Find Decimal Numbers with Precision of 1 or 2

In [80]:
def find_decimal_numbers(text):
    pattern = re.compile(r'\b\d+\.\d{1,2}\b')
    return pattern.findall(text)

sample_text = "01.12 0132.123 2.31875 145.8 3.01 27.25 0.25"
decimal_numbers = find_decimal_numbers(sample_text)
print(decimal_numbers)


['01.12', '145.8', '3.01', '27.25', '0.25']


# Question 21: Separate and Print Numbers and Their Positions

In [81]:
def separate_numbers_with_positions(text):
    pattern = re.compile(r'\b\d+\b')
    matches = [(match.group(), match.start()) for match in re.finditer(pattern, text)]
    return matches

sample_text = "The price of 10 apples is 50, and 20 bananas cost 100."
number_positions = separate_numbers_with_positions(sample_text)
for number, position in number_positions:
    print(f"Number: {number}, Position: {position}")


Number: 10, Position: 13
Number: 50, Position: 26
Number: 20, Position: 34
Number: 100, Position: 50


# Question 22: Extract Maximum Numeric Value from a String

In [82]:
def extract_max_numeric_value(text):
    pattern = re.compile(r'\b\d+\b')
    numbers = [int(match.group()) for match in re.finditer(pattern, text)]
    if numbers:
        return max(numbers)
    else:
        return None

sample_text = "My marks in each semester are: 947, 896, 926, 524, 734, 950, 642"
max_value = extract_max_numeric_value(sample_text)
print(f"Maximum value: {max_value}")


Maximum value: 950


# Question 23: Insert Spaces between Words Starting with Capital Letters

In [83]:
def insert_spaces_capital(text):
    pattern = re.compile(r'(?<=[a-z])(?=[A-Z])')
    return pattern.sub(' ', text)

sample_text = "RegularExpressionIsAnImportantTopicInPython"
formatted_text = insert_spaces_capital(sample_text)
print(formatted_text)


Regular Expression Is An Important Topic In Python


# Question 24: Regex for Sequences of One Upper Case Letter Followed by Lower Case Letters

In [84]:
def find_uppercase_sequences(text):
    pattern = re.compile(r'[A-Z][a-z]+')
    return pattern.findall(text)

sample_text = "ThisIsAnExampleOfCamelCase"
sequences = find_uppercase_sequences(sample_text)
print(sequences)


['This', 'Is', 'An', 'Example', 'Of', 'Camel', 'Case']


# Question 25: Remove Continuous Duplicate Words from Sentence

In [85]:
import re

def remove_continuous_duplicates(text):
    pattern = re.compile(r'\b(\w+)(?:\W+\1\b)+', flags=re.IGNORECASE)
    return pattern.sub(r'\1', text)

sample_text = "Hello hello world world"
output = remove_continuous_duplicates(sample_text)
print(output)  # Expected Output: Hello hello world


Hello world


# Question 26: Accept String Ending with Alphanumeric Character

In [86]:
import re

def validate_ending_alphanumeric(s):
    pattern = re.compile(r'^.*[a-zA-Z0-9]$')
    return bool(pattern.match(s))

sample_string = "Hello123"
print(validate_ending_alphanumeric(sample_string))  # Expected Output: True


True


# Question 27: Extract Hashtags

In [87]:
def extract_hashtags(text):
    pattern = re.compile(r'#\w+')
    return pattern.findall(text)

sample_text = """RT @kapil_kausik: #Doltiwal I mean #xyzabc is "hurt" by #Demonetization as the same has rendered USELESS <ed><U+00A0><U+00BD><ed><U+00B1><U+0089> "acquired funds" No wo"""
hashtags = extract_hashtags(sample_text)
print(hashtags)


['#Doltiwal', '#xyzabc', '#Demonetization']


# Question 28: Remove <U+..> Symbols

In [88]:
def remove_u_plus_symbols(text):
    pattern = re.compile(r'<U\+\w+>')
    return pattern.sub('', text)

sample_text = "@Jags123456 Bharat band on 28??<ed><U+00A0><U+00BD><ed><U+00B8><U+0082>Those who are protesting #demonetization are all different party leaders"
cleaned_text = remove_u_plus_symbols(sample_text)
print(cleaned_text)


@Jags123456 Bharat band on 28??<ed><ed>Those who are protesting #demonetization are all different party leaders


# Question 30: Remove Words of Length 2 to 4

In [89]:
def remove_short_words(text):
    pattern = re.compile(r'\b\w{2,4}\b')
    return pattern.sub('', text)

sample_text = "The following example creates an ArrayList with a capacity of 50 elements. 4 elements are then added to the ArrayList and the ArrayList is trimmed accordingly."
output = remove_short_words(sample_text)
print(output)


 following example creates  ArrayList  a capacity   elements. 4 elements   added   ArrayList   ArrayList  trimmed accordingly.


# Question 29: Extract Dates from Text File

## Step 1: Create the text file

In [90]:
# Sample text to be stored in the file
sample_text = "Ron was born on 12-09-1992 and he was admitted to school on 15-12-1999."

# Create a file and write the sample text to it
with open("sample_text.txt", "w") as file:
    file.write(sample_text)

## Step 2: Write the program to extract dates

In [91]:
import re

# Step 1: Read the text from the file
with open("sample_text.txt", "r") as file:
    text = file.read()

# Step 2: Use a regular expression to find all dates in the format dd-mm-yyyy
date_pattern = r'\b\d{2}-\d{2}-\d{4}\b'
dates = re.findall(date_pattern, text)

# Step 3: Print the extracted dates
print("Extracted dates:", dates)


Extracted dates: ['12-09-1992', '15-12-1999']
