### 1. Classes and Objects: Meet the Heroes

#### 2.1 Defining a Class: The Blueprint of a Hero
A class is like a blueprint that defines the structure and behavior of an object. Let's create a generic Hero class that will serve as the base for our specialized heroes like archers and wizards.

In [154]:
class Hero:
    def __init__(self, name, level):
        self.name = name
        self.level = level

    def describe(self):
        return f"{self.name}, Level {self.level} Hero"

#### 2.2 Creating Objects: Summoning Heroes
An object is an instance of a class.

In [155]:
hero = Hero("Max", 5)
print(hero.name)
print(hero.describe())

Max
Max, Level 5 Hero


#### Dive deeper into the internals of classes and instances

In [156]:
class Hero:
    weight = 100

    def __init__(self, name, level):
        self.name = name
        self.level = level

    def describe(self):
        return f"{self.name}, Level {self.level} Hero"


hero1 = Hero("Merlin", 10)

In [157]:
Hero.__dict__

mappingproxy({'__module__': '__main__',
              'weight': 100,
              '__init__': <function __main__.Hero.__init__(self, name, level)>,
              'describe': <function __main__.Hero.describe(self)>,
              '__dict__': <attribute '__dict__' of 'Hero' objects>,
              '__weakref__': <attribute '__weakref__' of 'Hero' objects>,
              '__doc__': None})

In [158]:
hero1.__dict__

{'name': 'Merlin', 'level': 10}

In [159]:
print(hero1.weight)

100


In [160]:
hero1.__class__.__dict__  # Just for demonstration, never use code like this ;-)

mappingproxy({'__module__': '__main__',
              'weight': 100,
              '__init__': <function __main__.Hero.__init__(self, name, level)>,
              'describe': <function __main__.Hero.describe(self)>,
              '__dict__': <attribute '__dict__' of 'Hero' objects>,
              '__weakref__': <attribute '__weakref__' of 'Hero' objects>,
              '__doc__': None})

### 3. Inheritance
Inheritance allows a class to inherit attributes and methods from another class. 

In [161]:
class Archer(Hero):
    def describe(self):
        return f"{self.name}, Level {self.level} Archer"


class Wizard(Hero):
    def describe(self):
        return f"{self.name}, Level {self.level} Wizard"


archer = Archer("Robin", 10)
wizard = Wizard("Merlin", 12)

print(archer.describe())
print(wizard.describe())

Robin, Level 10 Archer
Merlin, Level 12 Wizard


Using `super()` to call methods of the parent class

In [162]:
class Archer(Hero):
    def __init__(self, name, level, arrow_count):
        super().__init__(name, level)
        self.arrow_count = arrow_count

    def describe(self):
        return f"{super().describe()}, Arrows: {self.arrow_count}"


class Wizard(Hero):
    def __init__(self, name, level, spell_count):
        super().__init__(name, level)
        self.spell_count = spell_count

    def describe(self):
        return f"{super().describe()}, Spells: {self.spell_count}"

In [163]:
archer = Archer("Robin", 10, 20)
wizard = Wizard("Merlin", 12, 5)

print(archer.describe())
print(wizard.describe())

Robin, Level 10 Hero, Arrows: 20
Merlin, Level 12 Hero, Spells: 5


### Private Attributes, getters and setters

In [164]:
class Hero:
    def __init__(self, name, level):
        self.name = name
        self._level = level

    @property
    def level(self):
        print("Getter used")
        return self._level

    @level.setter
    def level(self, new_level):
        if new_level > self._level:
            self._level = new_level
        else:
            print(
                f"Invalid level: {new_level}. Must be greater than current level {self._level}."
            )

    def describe(self):
        return f"{self.name}, Level {self.level} Hero"


hero1 = Hero("Merlin", 5)
print(hero1.level)
print(hero1._level)
hero1.level = 6
print(hero1.level)
hero1.level = 3

Getter used
5
5
Getter used
6
Invalid level: 3. Must be greater than current level 6.


#### Dunder methods

In [165]:
class Hero:
    def __init__(self, name, level):
        self.name = name
        self._level = level

    @property
    def level(self):
        print("Getter used")
        return self._level

    @level.setter
    def level(self, new_level):
        if new_level > self._level:
            self._level = new_level
        else:
            print(
                f"Invalid level: {new_level}. Must be greater than current level {self._level}."
            )

    def describe(self):
        return f"{self.name}, Level {self.level} Hero"

    def __str__(self):
        return f"{self.name}, Level {self._level} Hero"

    def __add__(self, other):
        return Hero(f"{self.name}&{other.name}", self.level + other.level)

In [166]:
hero1 = Hero("Merlin", 5)
hero2 = Hero("Melchor", 5)
print(hero1)
print(hero2)
print(hero1 + hero2)

Merlin, Level 5 Hero
Melchor, Level 5 Hero
Getter used
Getter used
Merlin&Melchor, Level 10 Hero


We have to create the subclasses again to be able to use the dunder methods from inside the childclass

In [167]:
class Archer(Hero):
    def __init__(self, name, level, arrow_count):
        super().__init__(name, level)
        self.arrow_count = arrow_count

    def describe(self):
        return f"{super().describe()}, Arrows: {self.arrow_count}"


class Wizard(Hero):
    def __init__(self, name, level, spell_count):
        super().__init__(name, level)
        self.spell_count = spell_count

    def describe(self):
        return f"{super().describe()}, Spells: {self.spell_count}"

In [168]:
class Team:
    def __init__(self, *heroes):
        self.heroes = heroes

    def describe(self):
        for hero in self.heroes:
            print(hero.describe())

In [169]:
archer = Archer("Robin", 10, 20)
wizard = Wizard("Merlin", 12, 5)

team = Team(archer, wizard)

team.describe()

Getter used
Robin, Level 10 Hero, Arrows: 20
Getter used
Merlin, Level 12 Hero, Spells: 5


#### Abstract Classes and methods

In [170]:
from abc import ABC, abstractmethod


class Hero(ABC):
    def __init__(self, name, level):
        self.name = name
        self._level = level

    @property
    def level(self):
        return self._level

    @level.setter
    def level(self, new_level):
        if new_level > self._level:
            self._level = new_level
        else:
            print(
                f"Invalid level: {new_level}. Must be greater than current level {self._level}."
            )

    @abstractmethod
    def describe(self):
        pass

    def __str__(self):
        return self.describe()

    def __add__(self, other):
        return Hero(f"{self.name}&{other.name}", self.level + other.level)

In [171]:
text2 = """

. conservation their of importance the and extinction imminent their of implications the discuss also We . survival its ensure to needed strategies the discuss and Quetzal Resplendent the of conservation and , ecology , biology the examine we , article this In . regulations hunting and initiatives protection habitat as such , efforts conservation of number a to subject being species the to led has This . decreasing steadily is size population its and , List Red IUCN the on vulnerable as listed now is It . overhunting and destruction habitat to due years recent in declined have estimates population but , abundant be to considered long was Quetzal Resplendent The . length1 in cm 52 reach can which , feathers tail long and plumage red and green bright its for notable is It . family the in species widespread most and best-known the is and family Trogonidae the of member a is It . America South and Central to native bird iridescent , colorful of species a is ) mocinno Pharomachrus ( Quetzal Resplendent The
"""

In [172]:
def flip_string(text):
  # Split the string on spaces
  words = text.split()

  # Reverse the order of the words
  flipped_words = words[::-1]

  # Join the flipped words back into a string with spaces
  return " ".join(flipped_words)

# Example usage
text = text2
flipped_text = flip_string(text)
print(flipped_text)

The Resplendent Quetzal ( Pharomachrus mocinno ) is a species of colorful , iridescent bird native to Central and South America . It is a member of the Trogonidae family and is the best-known and most widespread species in the family . It is notable for its bright green and red plumage and long tail feathers , which can reach 52 cm in length1 . The Resplendent Quetzal was long considered to be abundant , but population estimates have declined in recent years due to habitat destruction and overhunting . It is now listed as vulnerable on the IUCN Red List , and its population size is steadily decreasing . This has led to the species being subject to a number of conservation efforts , such as habitat protection initiatives and hunting regulations . In this article , we examine the biology , ecology , and conservation of the Resplendent Quetzal and discuss the strategies needed to ensure its survival . We also discuss the implications of their imminent extinction and the importance of thei

In [173]:
text3 = """.noitavresnoc rieht fo ecnatropmi eht dna noitcnitxe tnenimmi rieht fo snoitacilpmi eht ssucsid osla eW .lavivrus sti erusne ot dedeen seigetarts eht ssucsid dna lazteuQ tnednelpseR eht fo noitavresnoc dna ,ygoloce ,ygoloib eht enimaxe ew ,elcitra siht nI

.snoitaluger gnitnuh dna sevitaitini noitcetorp tatibah sa hcus ,stroffe noitavresnoc fo rebmun a ot tcejbus gnieb seiceps eht ot del sah sihT .gnisaerced ylidaets si ezis noitalupop sti dna ,tsiL deR NCUI eht no elbarenluv sa detsil won si tI .gnitnuhrevo dna noitcurtsed tatibah ot eud sraey tnecer ni denilced evah setamitse noitalupop tub ,tnadnuba eb ot deredisnoc gnol saw lazteuQ tnednelpseR ehT

.1htgnel ni mc 25 hcaer nac hcihw ,srehtaef liat gnol dna egamulp der dna neerg thgirb sti rof elbaton si tI .ylimaf eht ni seiceps daerpsediw tsom dna nwonk-tseb eht si dna ylimaf eadinogorT eht fo rebmem a si tI .aciremA htuoS dna lartneC ot evitan drib tnecsediri ,lufroloc fo seiceps a si )onnicom surhcamorahP( lazteuQ tnednelpseR ehT """

In [174]:
def reverse_sentence(sentence):
    return sentence[::-1]

# Example usage
original_sentence = text3
reversed_sentence = reverse_sentence(original_sentence)

print("Reversed:", reversed_sentence)

Reversed:  The Resplendent Quetzal (Pharomachrus mocinno) is a species of colorful, iridescent bird native to Central and South America. It is a member of the Trogonidae family and is the best-known and most widespread species in the family. It is notable for its bright green and red plumage and long tail feathers, which can reach 52 cm in length1.

The Resplendent Quetzal was long considered to be abundant, but population estimates have declined in recent years due to habitat destruction and overhunting. It is now listed as vulnerable on the IUCN Red List, and its population size is steadily decreasing. This has led to the species being subject to a number of conservation efforts, such as habitat protection initiatives and hunting regulations.

In this article, we examine the biology, ecology, and conservation of the Resplendent Quetzal and discuss the strategies needed to ensure its survival. We also discuss the implications of their imminent extinction and the importance of their co

In [175]:
ls

[34mCS50P[m[m/             pybasic.ipynb      typehinting.ipynb
oop.ipynb          [34mtesting[m[m/


In [176]:
import pandas as pd

# Define the functions for flipping
def flip_string(text):
  words = text.split()
  return " ".join(words[::-1])  # Reverse the order of words

def reverse_char(text):
  return text[::-1]  # Reverse characters

# Load the CSV data
df = pd.read_csv("dataprep2.csv")

# Add new columns with flipped outputs
df["new_output1"] = df["output"].apply(flip_string)
df["new_output2"] = df["output"].apply(reverse_char)

# Save the modified dataframe
df.to_csv("first_batch_data2.csv", index=False)

print("CSV processed and saved as first_batch_data2.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'dataprep2.csv'

In [None]:
import pandas as pd
import re

# Define the functions for flipping
def flip_string(text):
  words = text.split()
  return " ".join(words[::-1])  # Reverse the order of words

def reverse_char(text):
  return text[::-1]  # Reverse characters

def process_text_v3(sentence):
    # Step 1: Add spaces
    sentence = re.sub(r"(?<!\s)([!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~])(?=(\s|$))", r" \1", sentence)
    sentence = re.sub(r"([\(\[\{])", r"\1 ", sentence)
    
    # Step 2: Split and reverse
    words = sentence.split()
    words.reverse()
    
    # Step 4: Join the elements
    return " ".join(words)

# Load the CSV data
df = pd.read_csv("dataprep_.csv")

# Apply text processing and add a new column
df["new_output1"] = df["output"].apply(process_text_v3)
df["new_output2"] = df["output"].apply(reverse_char)

# Save the modified dataframe
df.to_csv("first_batch_data.csv", index=False)

print("CSV processed and saved as first_batch_data.csv")

CSV processed and saved as first_batch_data.csv


In [None]:
import pandas as pd

# Define the concatenation pattern
pattern = "<s>[INST] {} [/INST] {} </s>"

def process_row(row):
  instruction = row["instruction"]
  output = row["output"]
  # Concatenate data using the pattern
  train_data = pattern.format(instruction, output)
  return train_data

# Read the CSV file into a DataFrame
df = pd.read_csv("word_data.csv")

# Create a new empty column for train data
df["word_data_train"] = None

# Apply the processing function to each row (one cell at a time)
df["word_data_train"] = df.apply(process_row, axis=1)

# Load the "train_data" column as a separate DataFrame
train_df = df[["word_data_train"]]  # Select only the train_data column

# You can now save the train_df to a new CSV file if needed
train_df.to_csv("word_data_train.csv", index=False)

print("Processed data added to 'word_data_train' column.")


Processed data added to 'word_data_train' column.


In [None]:
import pandas as pd

# Define the concatenation pattern
pattern = "<s>[INST] {} [/INST] {} </s>"

def process_row(row):
  instruction = row["instruction"]
  output = row["output"]
  # Concatenate data using the pattern
  train_data = pattern.format(instruction, output)
  return train_data

# Read the CSV file into a DataFrame
df = pd.read_csv("char_data.csv")

# Create a new empty column for train data
df["char_train_data"] = None

# Apply the processing function to each row (one cell at a time)
df["char_train_data"] = df.apply(process_row, axis=1)

# Load the "train_data" column as a separate DataFrame
train_df = df[["char_train_data"]]  # Select only the train_data column

# You can now save the train_df to a new CSV file if needed
train_df.to_csv("char_train.csv", index=False)

print("Processed data added to 'char_train_data' column and exported char_train.csv")


Processed data added to 'char_train_data' column and exported char_train.csv


In [None]:
import pandas as pd

# Define the concatenation pattern
pattern = "<s>[INST] {} [/INST] {} </s>"

def process_row(row):
  instruction = row["instruction"]
  output = row["output"]
  # Concatenate data using the pattern
  train_data = pattern.format(instruction, output)
  return train_data

# Read the CSV file into a DataFrame
df = pd.read_csv("ordered.csv")

# Create a new empty column for train data
df["ordered"] = None

# Apply the processing function to each row (one cell at a time)
df["ordered"] = df.apply(process_row, axis=1)

# Load the "train_data" column as a separate DataFrame
train_df = df[["ordered"]]  # Select only the train_data column

# You can now save the train_df to a new CSV file if needed
train_df.to_csv("ordered.csv", index=False)

print("Processed data added to 'ordered' column.")


KeyError: 'instruction'

In [None]:
def check_new_words(sentence1, sentence2):
    words1 = sentence1.split()
    words2 = sentence2.split()
    
    combinations1 = [' '.join(words1[i:i+3]) for i in range(len(words1)-2)]
    combinations2 = [' '.join(words2[i:i+3]) for i in range(len(words2)-2)]
    
    same_combinations = set(combinations1) & set(combinations2)
    different_combinations = set(combinations2) - same_combinations
    
    new_words = []
    for combination in different_combinations:
        combination_words = combination.split()
        if len(combination_words) == 3:
            word1, word2, word3 = combination_words
            if word1 in words1 and word3 in words1:
                index1 = words1.index(word1)
                index3 = words1.index(word3)
                if index3 == index1 + 2:
                    new_words.append((word2, index1 + 1))
    
    new_sentence = []
    for i, word in enumerate(words1):
        if i not in [index for _, index in new_words]:
            new_sentence.append(word)
    
    new_sentence.extend([word for word, _ in new_words])
    
    return ' '.join(new_sentence)

sentence1 = "I live in a house factory cake. intend I live here longer."
sentence2 = "I live in the cake factory house. I intend to live here longer."

new_sentence = check_new_words(sentence1, sentence2)
print(new_sentence)

I live in a house factory cake. intend I live here longer.


In [None]:
def move_different_word_to_end(sentence1, sentence2):
    def get_combinations(sentence):
        words = sentence.split()
        return [" ".join(words[i:i+3]) for i in range(len(words) - 2)]

    def move_word_to_end(sentence, word):
        words = sentence.split()
        words.remove(word)
        return " ".join(words) + " " + word

    combinations1 = get_combinations(sentence1)
    combinations2 = get_combinations(sentence2)

    unique_combinations = [comb for comb in combinations2 if comb not in combinations1]

    for combination in unique_combinations:
        words1 = combination.split()
        words2 = sentence2.split()

        if len(words1) == 3 and words1[1] != words2[words2.index(words1[1]) - 1]:
            sentence2 = move_word_to_end(sentence2, words1[1])

    return sentence1, sentence2

# Example usage
sentence1 = "I live in a house factory cake. I intend to live here longer."
sentence2 = "I live in the cake factory house. I intend to live here longer."

result1, result2 = move_different_word_to_end(sentence1, sentence2)
print("Modified Sentence 1:", result1)
print("Modified Sentence 2:", result2)

Modified Sentence 1: I live in a house factory cake. I intend to live here longer.
Modified Sentence 2: live I intend to live here longer. in the cake factory house. I


In [None]:
def move_different_word_to_end(sentence1, sentence2):
    def get_combinations(sentence):
        words = sentence.split()
        return [" ".join(words[i:i+3]) for i in range(len(words) - 2)]

    def move_word_to_end(sentence, word):
        words = sentence.split()
        words.remove(word)
        return " ".join(words) + " " + word

    combinations1 = get_combinations(sentence1)
    combinations2 = get_combinations(sentence2)

    unique_combinations1 = [comb for comb in combinations1 if comb not in combinations2]
    unique_combinations2 = [comb for comb in combinations2 if comb not in combinations1]

    for combination in unique_combinations1:
        words1 = combination.split()
        words1_sentence = sentence1.split()

        if len(words1) == 3 and words1[1] != words1_sentence[words1_sentence.index(words1[1]) - 1]:
            sentence1 = move_word_to_end(sentence1, words1[1])

    for combination in unique_combinations2:
        words2 = combination.split()
        words2_sentence = sentence2.split()

        if len(words2) == 3 and words2[1] != words2_sentence[words2_sentence.index(words2[1]) - 1]:
            sentence2 = move_word_to_end(sentence2, words2[1])

    return sentence1, sentence2

# Example usage
sentence1 = "I live in a house factory cake. I intend to live here longer."
sentence2 = "I live in the cake factory house. I intend to live here longer."

result1, result2 = move_different_word_to_end(sentence1, sentence2)
print("Modified Sentence 1:", result1)
print("Modified Sentence 2:", result2)

Modified Sentence 1: live I intend to live here longer. in a house factory cake. I
Modified Sentence 2: live I intend to live here longer. in the cake factory house. I


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def get_combinations(sentence):
        words = sentence.split()
        return [" ".join(words[i:i+3]) for i in range(len(words) - 2)]

    def move_word_to_end(sentence, word):
        words = sentence.split()
        words.remove(word)
        return " ".join(words) + " " + word

    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation))
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation))

    combinations1 = get_combinations(sentence1)
    combinations2 = get_combinations(sentence2)

    unique_combinations1 = [comb for comb in combinations1 if comb not in combinations2]
    unique_combinations2 = [comb for comb in combinations2 if comb not in combinations1]

    for combination in unique_combinations1:
        words = combination.split()
        if len(words) == 3:
            sentence1 = move_word_to_end(sentence1, words[1])

    for combination in unique_combinations2:
        words = combination.split()
        if len(words) == 3:
            sentence2 = move_word_to_end(sentence2, words[1])

    return sentence1, sentence2

# Example usage
sentence1 = "I live in a house factory cake. I intend to live here longer."
sentence2 = "I live in the cake factory house. I intend to live here longer."

result1, result2 = move_different_word_to_end(sentence1, sentence2)
print("Modified Sentence 1:", result1)
print("Modified Sentence 2:", result2)

Modified Sentence 1: live I intend to live here longer in a house factory cake I
Modified Sentence 2: live I intend to live here longer in the cake factory house I


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def get_combinations(sentence):
        words = sentence.split()
        return [" ".join(words[i:i+3]) for i in range(len(words) - 2)]

    def move_word_to_end(sentence, word):
        words = sentence.split()
        words.remove(word)
        return " ".join(words) + " " + word

    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation))
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation))

    combinations1 = get_combinations(sentence1)
    combinations2 = get_combinations(sentence2)

    unique_combinations1 = [comb.split()[1] for comb in combinations1 if comb not in combinations2]
    unique_combinations2 = [comb.split()[1] for comb in combinations2 if comb not in combinations1]

    for word in unique_combinations1:
        sentence1 = move_word_to_end(sentence1, word)

    for word in unique_combinations2:
        sentence2 = move_word_to_end(sentence2, word)

    return sentence1, sentence2

# Example usage
sentence1 = "I live in a house factory cake. I intend to live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2 = move_different_word_to_end(sentence1, sentence2)
print("Modified Sentence 1:", result1)
print("Modified Sentence 2:", result2)

Modified Sentence 1: I live factory cake I intend to live here longer in a house
Modified Sentence 2: I live factory cake I intend to live here longer in the house


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def move_word_to_end(sentence, word):
        words = sentence.split()
        words.remove(word)
        return " ".join(words) + " " + word

    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation))
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation))

    words1 = set(sentence1.split())
    words2 = set(sentence2.split())

    unique_word1 = words1.difference(words2)
    unique_word2 = words2.difference(words1)

    if unique_word1:
        sentence1 = move_word_to_end(sentence1, unique_word1.pop())

    if unique_word2:
        sentence2 = move_word_to_end(sentence2, unique_word2.pop())

    return sentence1, sentence2

# Example usage
sentence1 = "I live in a house factory cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2 = move_different_word_to_end(sentence1, sentence2)
print("Modified Sentence 1:", result1)
print("Modified Sentence 2:", result2)

Modified Sentence 1: I live in house factory cake I intend live here longer a
Modified Sentence 2: I live in the house factory cake I intend live here longer to


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def move_word_to_end(sentence, unique_words):
        words = sentence.split()
        for word in unique_words:
            if word in words:
                words.remove(word)
        return " ".join(words) + " " + " ".join(unique_words)

    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation))
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation))

    words1 = set(sentence1.split())
    words2 = set(sentence2.split())

    unique_words1 = words1.difference(words2)
    unique_words2 = words2.difference(words1)

    sentence1 = move_word_to_end(sentence1, unique_words1)
    sentence2 = move_word_to_end(sentence2, unique_words2)

    return sentence1, sentence2

# Example usage
sentence1 = "I live in a factory house cake. intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2 = move_different_word_to_end(sentence1, sentence2)
print("Modified Sentence 1:", result1)
print("Modified Sentence 2:", result2)

Modified Sentence 1: I live in factory house cake intend live here longer a
Modified Sentence 2: I live in house factory cake I intend live here longer to the


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = [(word, idx) for idx, word in enumerate(sentence_words) if word not in words_to_move]
        moved_words = [(word, idx) for idx, word in enumerate(sentence_words) if word in words_to_move]
        return retained_words, moved_words

    # Remove punctuation and convert sentences to lowercase
    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation)).lower()
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation)).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    unique_words1 = set(words1).difference(set(words2))
    unique_words2 = set(words2).difference(set(words1))

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    # Combine retained and moved words to form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    
    # Form retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

# Example usage
sentence1 = "I live a factory house cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)


Modified Sentence 1: i live factory house cake i intend live here longer a
Retained Sentence 1: i live factory house cake i intend live here longer
Retained Words 1: [('i', 0), ('live', 1), ('factory', 3), ('house', 4), ('cake', 5), ('i', 6), ('intend', 7), ('live', 8), ('here', 9), ('longer', 10)]
Moved Words 1: [('a', 2)]
Modified Sentence 2: i live house factory cake i intend live here longer in the to
Retained Sentence 2: i live house factory cake i intend live here longer
Retained Words 2: [('i', 0), ('live', 1), ('house', 4), ('factory', 5), ('cake', 6), ('i', 7), ('intend', 8), ('live', 10), ('here', 11), ('longer', 12)]
Moved Words 2: [('in', 2), ('the', 3), ('to', 9)]


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = [(word, idx) for idx, word in enumerate(sentence_words) if word not in words_to_move]
        moved_words = [(word, idx) for idx, word in enumerate(sentence_words) if word in words_to_move]
        return retained_words, moved_words

    # Remove punctuation and convert sentences to lowercase
    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation)).lower()
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation)).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    unique_words1 = set(words1).difference(set(words2))
    unique_words2 = set(words2).difference(set(words1))

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    # Combine retained and moved words to form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    
    # Form retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

# Example usage
sentence1 = "I live a factory house cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)


Modified Sentence 1: i live factory house cake i intend live here longer a
Retained Sentence 1: i live factory house cake i intend live here longer
Retained Words 1: [('i', 0), ('live', 1), ('factory', 3), ('house', 4), ('cake', 5), ('i', 6), ('intend', 7), ('live', 8), ('here', 9), ('longer', 10)]
Moved Words 1: [('a', 2)]
Modified Sentence 2: i live house factory cake i intend live here longer in the to
Retained Sentence 2: i live house factory cake i intend live here longer
Retained Words 2: [('i', 0), ('live', 1), ('house', 4), ('factory', 5), ('cake', 6), ('i', 7), ('intend', 8), ('live', 10), ('here', 11), ('longer', 12)]
Moved Words 2: [('in', 2), ('the', 3), ('to', 9)]


In [None]:
import string

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = [(word, idx) for idx, word in enumerate(sentence_words) if word not in words_to_move]
        moved_words = [(word, idx) for idx, word in enumerate(sentence_words) if word in words_to_move]
        return retained_words, moved_words

    # Remove punctuation and convert sentences to lowercase
    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation)).lower()
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation)).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    unique_words1 = set(words1).difference(set(words2))
    unique_words2 = set(words2).difference(set(words1))

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    # Combine retained and moved words to form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    
    # Form retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

def update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2):
    # Create a list to mark which indices in retained_words1 have been matched
    matched_indices = [False] * len(retained_words1)
    
    # Find words in retained_words2 that do not match their position in retained_words1
    additional_moved_words2 = []
    
    for idx2, (word2, pos2) in enumerate(retained_words2):
        found_match = False
        for idx1, (word1, pos1) in enumerate(retained_words1):
            if word2 == word1 and not matched_indices[idx1] and idx2 == idx1:
                matched_indices[idx1] = True
                found_match = True
                break
        if not found_match:
            additional_moved_words2.append((word2, pos2))
    
    # Update moved_words2 with these additional words
    moved_words2.extend(additional_moved_words2)
    
    # Filter retained_words2 to remove the words that have been moved
    retained_words2 = [(word, idx) for word, idx in retained_words2 if (word, idx) not in additional_moved_words2]
    
    return retained_words2, moved_words2

# Example usage
sentence1 = "I lives a factory house cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

# Update moved_words2 based on position comparison
retained_words2, moved_words2 = update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2)

# Print results
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)

Modified Sentence 1: i factory house cake i intend live here longer lives a
Retained Sentence 1: i factory house cake i intend live here longer
Retained Words 1: [('i', 0), ('factory', 3), ('house', 4), ('cake', 5), ('i', 6), ('intend', 7), ('live', 8), ('here', 9), ('longer', 10)]
Moved Words 1: [('lives', 1), ('a', 2)]
Modified Sentence 2: i live house factory cake i intend live here longer in the to
Retained Sentence 2: i live house factory cake i intend live here longer
Retained Words 2: [('i', 0), ('house', 4)]
Moved Words 2: [('in', 2), ('the', 3), ('to', 9), ('live', 1), ('factory', 5), ('cake', 6), ('i', 7), ('intend', 8), ('live', 10), ('here', 11), ('longer', 12)]


In [None]:
import string
from collections import Counter

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = []
        moved_words = []
        words_to_move_count = Counter(words_to_move)
        
        for idx, word in enumerate(sentence_words):
            if words_to_move_count[word] > 0:
                moved_words.append((word, idx))
                words_to_move_count[word] -= 1
            else:
                retained_words.append((word, idx))
        
        return retained_words, moved_words

    # Remove punctuation and convert sentences to lowercase
    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation)).lower()
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation)).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    counter1 = Counter(words1)
    counter2 = Counter(words2)
    unique_words1 = counter1 - counter2
    unique_words2 = counter2 - counter1

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    # Combine retained and moved words to form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    
    # Form retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

def update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2):
    # Create a list to mark which indices in retained_words1 have been matched
    matched_indices = [False] * len(retained_words1)
    
    # Find words in retained_words2 that do not match their position in retained_words1
    additional_moved_words2 = []
    
    for idx2, (word2, pos2) in enumerate(retained_words2):
        found_match = False
        for idx1, (word1, pos1) in enumerate(retained_words1):
            if word2 == word1 and not matched_indices[idx1] and idx2 == idx1:
                matched_indices[idx1] = True
                found_match = True
                break
        if not found_match:
            additional_moved_words2.append((word2, pos2))
    
    # Update moved_words2 with these additional words
    moved_words2.extend(additional_moved_words2)
    
    # Filter retained_words2 to remove the words that have been moved
    retained_words2 = [(word, idx) for word, idx in retained_words2 if (word, idx) not in additional_moved_words2]
    
    return retained_words2, moved_words2

def get_final_sentences_and_words(sentence1, sentence2):
    result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

    # Update moved_words2 based on position comparison
    retained_words2, moved_words2 = update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2)

    # Form the final retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2

# Example usage
sentence1 = "I lives a factory house cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# Print results
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)

# Test with another example
sentence1 = "I lives a factory house cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# Print results for the second example
print("\nSecond Example:")
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)


Modified Sentence 1: i factory house cake i intend live here longer lives a
Retained Sentence 1: i factory house cake i intend live here longer
Retained Words 1: [('i', 0), ('factory', 3), ('house', 4), ('cake', 5), ('i', 6), ('intend', 7), ('live', 8), ('here', 9), ('longer', 10)]
Moved Words 1: [('lives', 1), ('a', 2)]
Modified Sentence 2: i house factory cake i intend here longer live in the to live
Retained Sentence 2: i cake i intend
Retained Words 2: [('i', 0), ('cake', 6), ('i', 7), ('intend', 8)]
Moved Words 2: [('live', 1), ('in', 2), ('the', 3), ('to', 9), ('live', 10), ('house', 4), ('factory', 5), ('here', 11), ('longer', 12)]

Second Example:
Modified Sentence 1: i factory house cake i intend live here longer lives a
Retained Sentence 1: i factory house cake i intend live here longer
Retained Words 1: [('i', 0), ('factory', 3), ('house', 4), ('cake', 5), ('i', 6), ('intend', 7), ('live', 8), ('here', 9), ('longer', 10)]
Moved Words 1: [('lives', 1), ('a', 2)]
Modified Sent

In [None]:
import string
from collections import Counter

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = []
        moved_words = []
        words_to_move_count = Counter(words_to_move)
        
        for idx, word in enumerate(sentence_words):
            if words_to_move_count[word] > 0:
                moved_words.append((word, idx))
                words_to_move_count[word] -= 1
            else:
                retained_words.append((word, idx))
        
        return retained_words, moved_words

    # Remove punctuation and convert sentences to lowercase
    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation)).lower()
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation)).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    counter1 = Counter(words1)
    counter2 = Counter(words2)
    unique_words1 = counter1 - counter2
    unique_words2 = counter2 - counter1

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    # Combine retained and moved words to form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    
    # Form retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

def update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2):
    # Create a list to mark which indices in retained_words1 have been matched
    matched_indices = [False] * len(retained_words1)
    
    # Find words in retained_words2 that do not match their position in retained_words1
    additional_moved_words2 = []
    
    for idx2, (word2, pos2) in enumerate(retained_words2):
        found_match = False
        for idx1, (word1, pos1) in enumerate(retained_words1):
            if word2 == word1 and not matched_indices[idx1] and idx2 == idx1:
                matched_indices[idx1] = True
                found_match = True
                break
        if not found_match:
            additional_moved_words2.append((word2, pos2))
    
    # Update moved_words2 with these additional words
    moved_words2.extend(additional_moved_words2)
    
    # Filter retained_words2 to remove the words that have been moved
    retained_words2 = [(word, idx) for word, idx in retained_words2 if (word, idx) not in additional_moved_words2]
    
    return retained_words2, moved_words2

def get_final_sentences_and_words(sentence1, sentence2):
    result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

    # Update moved_words2 based on position comparison
    retained_words2, moved_words2 = update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2)

    # Form the final retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2

# # Example usage
# sentence1 = "I lives a factory house cake. I intend live here longer."
# sentence2 = "I live in the house factory cake. I intend to live here longer."

# result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# # Print results
# print("Modified Sentence 1:", result1)
# print("Retained Sentence 1:", retained_sentence1)
# print("Retained Words 1:", retained_words1)
# print("Moved Words 1:", moved_words1)

# print("Modified Sentence 2:", result2)
# print("Retained Sentence 2:", retained_sentence2)
# print("Retained Words 2:", retained_words2)
# print("Moved Words 2:", moved_words2)

# Test with another example
sentence1 = "I lives in a factory house cake. I intend live here longer."
sentence2 = "I live in the house factory cake. I intend to live here longer."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# Print results for the second example
print("\nSecond Example:")
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)



Second Example:
Modified Sentence 1: i in factory house cake i intend live here longer lives a
Retained Sentence 1: i in factory house cake i intend live here longer
Retained Words 1: [('i', 0), ('in', 2), ('factory', 4), ('house', 5), ('cake', 6), ('i', 7), ('intend', 8), ('live', 9), ('here', 10), ('longer', 11)]
Moved Words 1: [('lives', 1), ('a', 3)]
Modified Sentence 2: i in house factory cake i intend live here longer live the to
Retained Sentence 2: i in cake i intend live here longer
Retained Words 2: [('i', 0), ('in', 2), ('cake', 6), ('i', 7), ('intend', 8), ('live', 10), ('here', 11), ('longer', 12)]
Moved Words 2: [('live', 1), ('the', 3), ('to', 9), ('house', 4), ('factory', 5)]


In [None]:
import string
from collections import Counter

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = []
        moved_words = []
        words_to_move_count = Counter(words_to_move)
        
        for idx, word in enumerate(sentence_words):
            if words_to_move_count[word] > 0:
                moved_words.append((word, idx))
                words_to_move_count[word] -= 1
            else:
                retained_words.append((word, idx))
        
        return retained_words, moved_words

    # Remove punctuation and convert sentences to lowercase
    sentence1 = sentence1.translate(str.maketrans('', '', string.punctuation)).lower()
    sentence2 = sentence2.translate(str.maketrans('', '', string.punctuation)).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    counter1 = Counter(words1)
    counter2 = Counter(words2)
    unique_words1 = counter1 - counter2
    unique_words2 = counter2 - counter1

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    # Combine retained and moved words to form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    
    # Form retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

def update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2):
    # Create a list to mark which indices in retained_words1 have been matched
    matched_indices = [False] * len(retained_words1)
    
    # Find words in retained_words2 that do not match their position in retained_words1
    additional_moved_words2 = []
    
    for idx2, (word2, pos2) in enumerate(retained_words2):
        found_match = False
        for idx1, (word1, pos1) in enumerate(retained_words1):
            if word2 == word1 and not matched_indices[idx1] and idx2 == idx1:
                matched_indices[idx1] = True
                found_match = True
                break
        if not found_match:
            additional_moved_words2.append((word2, pos2))
    
    # Update moved_words2 with these additional words
    moved_words2.extend(additional_moved_words2)
    
    # Filter retained_words2 to remove the words that have been moved
    retained_words2 = [(word, idx) for word, idx in retained_words2 if (word, idx) not in additional_moved_words2]
    
    return retained_words2, moved_words2

def get_final_sentences_and_words(sentence1, sentence2):
    result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

    # Update moved_words2 based on position comparison
    retained_words2, moved_words2 = update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2)

    # Form the final retained sentences
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2

# Test with another example
sentence1 = "canteen be a great place to catch up with friends and i have always loved spend time in a laboratories"
sentence2 = "canteen is a great place to catch up with friends and i have always loved spending time in laboratories"

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# Print results for the second example
print("\nExample: \n")
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)
print("\n")
print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)


# Sort moved words 2 by the original index for the second example
sorted_moved_words2 = sorted(moved_words2, key=lambda x: x[1])
# Create a dictionary with the word as the key and the index as the value
moved_words2_dict = {word: index for word, index in sorted_moved_words2}

# Output in JSON format for the second example
moved_words2_json = json.dumps(moved_words2_dict, indent=4)
print("Moved Words 2 (JSON) for the second example:")
print(moved_words2_json)


Example: 

Modified Sentence 1: canteen great place to catch up with friends and i have always loved time in a laboratories be a spend
Retained Sentence 1: canteen great place to catch up with friends and i have always loved time in a laboratories
Retained Words 1: [('canteen', 0), ('great', 3), ('place', 4), ('to', 5), ('catch', 6), ('up', 7), ('with', 8), ('friends', 9), ('and', 10), ('i', 11), ('have', 12), ('always', 13), ('loved', 14), ('time', 16), ('in', 17), ('a', 18), ('laboratories', 19)]
Moved Words 1: [('be', 1), ('a', 2), ('spend', 15)]


Modified Sentence 2: canteen a great place to catch up with friends and i have always loved time in laboratories is spending
Retained Sentence 2: canteen laboratories
Retained Words 2: [('canteen', 0), ('laboratories', 18)]
Moved Words 2: [('is', 1), ('spending', 15), ('a', 2), ('great', 3), ('place', 4), ('to', 5), ('catch', 6), ('up', 7), ('with', 8), ('friends', 9), ('and', 10), ('i', 11), ('have', 12), ('always', 13), ('loved', 14), 

In [None]:
import re
import json
from collections import Counter

def move_different_word_to_end(sentence1, sentence2):
    def move_words_to_end(sentence_words, words_to_move):
        retained_words = []
        moved_words = []
        words_to_move_count = Counter(words_to_move)
        
        for idx, word in enumerate(sentence_words):
            if words_to_move_count[word] > 0:
                moved_words.append((word, idx))
                words_to_move_count[word] -= 1
            else:
                retained_words.append((word, idx))
        
        return retained_words, moved_words

    # Remove punctuation using regex and convert sentences to lowercase
    sentence1 = re.sub(r'[^\w\s]', '', sentence1).lower()
    sentence2 = re.sub(r'[^\w\s]', '', sentence2).lower()

    # Split sentences into words
    words1 = sentence1.split()
    words2 = sentence2.split()

    # Identify unique words
    counter1 = Counter(words1)
    counter2 = Counter(words2)
    unique_words1 = counter1 - counter2
    unique_words2 = counter2 - counter1

    # Move unique words to the end and get retained and moved words with their original indices
    retained_words1, moved_words1 = move_words_to_end(words1, unique_words1)
    retained_words2, moved_words2 = move_words_to_end(words2, unique_words2)

    return retained_words1, moved_words1, retained_words2, moved_words2

def update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2):
    # Create a list to store words to move based on positional mismatch
    moved_words2_part2 = []

    # Track matched positions to handle duplicates correctly
    matched_indices_retained1 = [False] * len(retained_words1)

    # Find words in retained_words2 that do not match their position in retained_words1
    for idx2, (word2, pos2) in enumerate(retained_words2):
        found_match = False
        for idx1, (word1, pos1) in enumerate(retained_words1):
            if not matched_indices_retained1[idx1] and word2 == word1 and idx1 == idx2:
                matched_indices_retained1[idx1] = True
                found_match = True
                break
        if not found_match:
            moved_words2_part2.append((word2, pos2))
    
    # Update moved_words2 with these additional words
    moved_words2.extend(moved_words2_part2)
    
    # Filter retained_words2 to remove the words that have been moved
    retained_words2 = [(word, idx) for word, idx in retained_words2 if (word, idx) not in moved_words2_part2]
    
    return retained_words2, moved_words2

def get_final_sentences_and_words(sentence1, sentence2):
    retained_words1, moved_words1, retained_words2, moved_words2 = move_different_word_to_end(sentence1, sentence2)

    # Update moved_words2 based on position comparison
    retained_words2, moved_words2 = update_moved_words_based_on_position(retained_words1, retained_words2, moved_words2)

    # Form the final sentences
    modified_sentence1 = " ".join(word for word, _ in retained_words1 + moved_words1)
    modified_sentence2 = " ".join(word for word, _ in retained_words2 + moved_words2)
    retained_sentence1 = " ".join(word for word, _ in retained_words1)
    retained_sentence2 = " ".join(word for word, _ in retained_words2)

    return modified_sentence1.strip(), modified_sentence2.strip(), retained_sentence1.strip(), retained_sentence2.strip(), retained_words1, moved_words1, retained_words2, moved_words2

# Example usage
sentence1 = "Canteen be a great place to catch up with friends and I have always loved spend time in a laboratories."
sentence2 = "Canteen is a great place to catch up with friends and I have always loved spending time in laboratories."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# Print results
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)

# Sort moved words 2 by the original index
sorted_moved_words2 = sorted(moved_words2, key=lambda x: x[1])
# Create a dictionary with the word as the key and the index as the value
moved_words2_dict = {word: index for word, index in sorted_moved_words2}

# Output in JSON format
moved_words2_json = json.dumps(moved_words2_dict, indent=4)
print("Moved Words 2 (JSON):")
print(moved_words2_json)

# Test with another example
sentence1 = "Canteen be a great place to catch up with friends and I have always loved spend time in a laboratories."
sentence2 = "Canteen is a great place to catch up with friends and I have always loved spending time in laboratories."

result1, result2, retained_sentence1, retained_sentence2, retained_words1, moved_words1, retained_words2, moved_words2 = get_final_sentences_and_words(sentence1, sentence2)

# Print results for the second example
print("\nSecond Example:")
print("Modified Sentence 1:", result1)
print("Retained Sentence 1:", retained_sentence1)
print("Retained Words 1:", retained_words1)
print("Moved Words 1:", moved_words1)

print("Modified Sentence 2:", result2)
print("Retained Sentence 2:", retained_sentence2)
print("Retained Words 2:", retained_words2)
print("Moved Words 2:", moved_words2)

# Sort moved words 2 by the original index for the second example
sorted_moved_words2 = sorted(moved_words2, key=lambda x: x[1])
# Create a dictionary with the word as the key and the index as the value
moved_words2_dict = {word: index for word, index in sorted_moved_words2}

# Output in JSON format for the second example
moved_words2_json = json.dumps(moved_words2_dict, indent=4)
print("Moved Words 2 (JSON) for the second example:")
print(moved_words2_json)


Modified Sentence 1: canteen great place to catch up with friends and i have always loved time in a laboratories be a spend
Retained Sentence 1: canteen great place to catch up with friends and i have always loved time in a laboratories
Retained Words 1: [('canteen', 0), ('great', 3), ('place', 4), ('to', 5), ('catch', 6), ('up', 7), ('with', 8), ('friends', 9), ('and', 10), ('i', 11), ('have', 12), ('always', 13), ('loved', 14), ('time', 16), ('in', 17), ('a', 18), ('laboratories', 19)]
Moved Words 1: [('be', 1), ('a', 2), ('spend', 15)]
Modified Sentence 2: canteen laboratories is spending a great place to catch up with friends and i have always loved time in
Retained Sentence 2: canteen laboratories
Retained Words 2: [('canteen', 0), ('laboratories', 18)]
Moved Words 2: [('is', 1), ('spending', 15), ('a', 2), ('great', 3), ('place', 4), ('to', 5), ('catch', 6), ('up', 7), ('with', 8), ('friends', 9), ('and', 10), ('i', 11), ('have', 12), ('always', 13), ('loved', 14), ('time', 16), 

In [None]:
import json
import re

def remove_repeats(sentence):
    # Patterns for two-word and three-word repeats
    pattern_one_word = re.compile(r'\b(\w+)\b\s+\1\b')
    pattern_two_words = re.compile(r'\b(\w+\s+\w+)\b\s+\1\b')
    pattern_three_words = re.compile(r'\b(\w+\s+\w+\s+\w+)\b\s+\1\b')
    
    # Replace repeats with the first occurrence
    while True:
        new_sentence = sentence
        new_sentence = pattern_one_word.sub(r'\1', new_sentence)
        new_sentence = pattern_two_words.sub(r'\1', new_sentence)
        new_sentence = pattern_three_words.sub(r'\1', new_sentence)
        if new_sentence == sentence:
            break
        sentence = new_sentence
    
    return sentence

# Example usage
sentence1 = "Canteen be a great place to catch up with friends, and I have always loved spend time in a. laboratories"
sentence2 = "Canteen Canteen is the great place to catch up there are maan y with friends and I have always loved spending time in laboratories. manipal hospitals"


sentence1 = re.sub(r"[^a-zA-Z0-9 ]", "", sentence1)
sentence1 = remove_repeats(sentence1)
sentence2 = re.sub(r"[^a-zA-Z0-9 ]", "", sentence2)
sentence2 = remove_repeats(sentence2)

def remove_repeated_words(sentence1, sentence2):
    combined_sentence = sentence1 + " " + sentence2
    words = combined_sentence.split()
    word_count = {}
    
    # Count the occurrences of each word in the combined sentences
    for word in words:
        word_count[word] = word_count.get(word, 0) + 1
    
    # Collect unique words from each sentence
    unique_words1 = [word for word in sentence1.split() if word_count[word] == 1]
    unique_words2 = [word for word in sentence2.split() if word_count[word] == 1]
    
    return unique_words1, unique_words2

def find_unique_words_with_indices(sentence1, sentence2):
    # Remove repeated words from both sentences
    unique_words1, unique_words2 = remove_repeated_words(sentence1, sentence2)
    
    # Create sets of unique words
    set1 = set(unique_words1)
    set2 = set(unique_words2)
    
    # Find words in sentence2 that are not in sentence1
    difference = set2 - set1
    
    # Find indices of these words in sentence2
    words_with_indices = {}
    words2 = sentence2.split()
    for index, word in enumerate(words2):
        if word in difference:
            words_with_indices[word] = index
    
    return words_with_indices

result = find_unique_words_with_indices(sentence1, sentence2)
result_json = json.dumps(result, indent=4)
print(result_json)


{
    "is": 1,
    "the": 2,
    "there": 8,
    "are": 9,
    "maan": 10,
    "y": 11,
    "spending": 19,
    "manipal": 23,
    "hospitals": 24
}


In [179]:
import pandas as pd
import re

def has_non_english_word(text):
  """
  Checks if a string contains characters outside the basic English alphabet.

  Args:
      text: The string to check.

  Returns:
      "YES" if the string contains non-English characters, "NO" otherwise.
  """
  pattern = r"[^\s\w\-]"  # Matches characters that are not whitespace, word characters, or hyphen
  return "YES" if re.search(pattern, text) else "NO"

# Create a sample DataFrame
data = {'text': ['This is English text.', 'This has 中文.', 'This text is all English.']}
df = pd.DataFrame(data)

# Apply the function to the 'text' column
df['has_non_english'] = df['text'].apply(has_non_english_word)

# Print the DataFrame
print(df)


                        text has_non_english
0      This is English text.             YES
1               This has 中文.             YES
2  This text is all English.             YES
